def save_job_results(geocoder, job_id): """ Download and save to S3 results for completed jobs. """ logging.info('Saving results for %s to S3' % job_id) finished_folder = 'geocode_finished_jobs' pending_folder = 'geocode_pending_jobs' connection = boto.connect_s3() bucket = connection.get_bucket(GEO_BUCKET) old_key = bucket.get_key('%s/%s' % (pending_folder, job_id)) new_name = old_key.get_contents_as_string() new_key = Key(bucket) new_key.key = '%s/%s' % (finished_folder, new_name) results = geocoder.get_job_results(job_id) result_string = StringIO.StringIO() writer = DictWriter(result_string, fieldnames=results[0].keys()) writer.writeheader() writer.writerows(results) result_string.seek(0) email_address = old_key.get_metadata('email') if email_address: new_key.set_metadata('email', email_address) send_email_notification( email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished') new_key.set_contents_from_string(result_string.getvalue()) new_key.make_public() old_key.delete()
def main(argv): uname, pwd, filelist, ifname, ofname = getParms() # try opening the files try: #scl enable python27 bash # to allow multiple openings on one line with open(filelist, "rb") as fhl, open(ifname, "rb") as fhi, open(ofname, "wb") as fho: # read in the list of filenames to insert d = {} for line in fhl: fname = line.split('/')[-1].rstrip() #filename points to folder parts = splitFilename(fname) crawldir = parts.group(1) if crawldir in d: d[crawldir].append(line.rstrip()) else: d[crawldir] = [line.rstrip()] fields = ['identifier','filename','folder','date_created','checksum', \ 'series_number','creating_body','crawl_start', 'crawl_end', \ 'filesize', 'unit'] reader = DictReader(fhi, fieldnames=fields) writer = DictWriter(fho, delimiter=',', fieldnames=fields) writer.writerow(dict((fn,fn) for fn in fields)) print "[INFO] Opened files successfully." insertFiles(uname, pwd, d, reader, writer) except IOError as e: print "[IOERROR] " + e
def run_queries(session, state): query_inputs = term_handler(state) combinations = cartesian_product(query_inputs) for query in combinations: PARAMS.update(query) logger.info('query') sleep(SLEEP_TIME) page = session.get(url = OB_BASE % SEARCH_URL, params = PARAMS) logger.info('got page') pricing_data = page.json() with open(WRITE_FILE_PATH, 'wb') as output_file: fieldnames = pricing_data['PricingRecords'][0].keys() fieldnames.append('Scenario') print 'FIELDNAMES %s' % fieldnames logger.info('header %s' % fieldnames) csv_output = DictWriter(output_file, fieldnames=fieldnames) csv_output.writeheader() for row in pricing_data['PricingRecords']: row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']], product=query_inputs["ProductType"][query["ProductType"]], purpose=query_inputs["Purpose"][query["Purpose"]], amount=query_inputs["LoanAmount"][query["LoanAmount"]], ltv=query_inputs["LTV"][query["LTV"]], fico=query_inputs["FICO"][query["FICO"]], term=query_inputs["Term"][query["Term"]]) logger.info('adding row %s' % row) csv_output.writerow(row)
def main(infile, outfile): with open(infile) as inf, open(outfile, "w") as outf: r = DictReader(inf) rows = [process(row) for row in r] w = DictWriter(outf, fieldnames=rows[0].keys()) w.writeheader() w.writerows(rows)
def run(args): opts = parse_args(args) opts.prefix = opts.prefix or opts.graph.split('.', 1)[0] sheets = {} sheet_headers = {} try: with file(opts.graph) as csvfile: for row in reader(csvfile): fv = dict(column.split('=', 1) for column in row) entity_type = fv.pop('Entity Type') headers = fv.keys() if entity_type not in sheets: sheets[entity_type] = [fv] sheet_headers[entity_type] = set(headers) continue else: sheets[entity_type].append(fv) if len(headers) > len(sheet_headers[entity_type]): sheet_headers[entity_type].union(headers) for entity_type in sheets: with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile: csv = DictWriter(csvfile, sheet_headers[entity_type]) csv.writeheader() csv.writerows(sheets[entity_type]) except IOError, e: print 'csv2sheets: %s' % e exit(-1)
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", "w"), ["id", "position"]) o.writeheader() for ii, pp in zip([x["id"] for x in self.test], self.predictions): d = {"id": ii, "position": pp} o.writerow(d)
def test_01_importTab(self): #先检查表,如果都没有则直接创建 #client.drop_database(DB_INFO['DB_NAME']) SD.importTab() SD.copy_table() self.assertIn('FACT_ATTR', db.collection_names()) self.assertIn('FACT_SERVICE', db.collection_names()) self.assertIn('FACT_ATTR_SET', db.collection_names()) self.assertIn('FACT_SCENE', db.collection_names()) #如果有,先检查id如果不存在则增加,否则update L = list() table = 'FACT_ATTR' filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table]) with open(filepath, 'r') as f: dReader = DictReader(f) L = [i for i in dReader] L[-1]['attr_set_id'] = 1 L[-1]['value'] = 'rampage' L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2}) with open(filepath, 'w') as f: titles = L[-1].keys() dwriter = DictWriter(f, titles) header = dict(zip(titles, titles)) dwriter.writerow(header) dwriter.writerows(L) SD.importTab() #这里顺便把查询也一起测了 match = {'_id': {'$in': [3,4]}} rs = list(db[table].find(match)) self.assertEqual(len(rs), 2) self.assertEqual(rs[-2]['attr_set_id'], 1) self.assertEqual(rs[-2]['value'], 'rampage')
def write_csv(self, array, fname, delimiter=":"): with open(fname, "w") as f: fieldnames = list(array[0].keys()) writer = DictWriter(f, delimiter=delimiter, lineterminator="\n", fieldnames=fieldnames) writer.writerow(dict((field, field) for field in fieldnames)) for row in array: writer.writerow(row)
def write_report(report): f = open(report.filename, 'wb') print >>f, ','.join(report.columns) writer = DictWriter(f, report.columns) for row in report.rows: writer.writerow(_encode_row(row)) f.close()
def main(argv): mountpoint, ifname, ofname = getParms() if not mountpoint.endswith('/'): mountpoint = mountpoint + '/' metaname = mountpoint + 'RW_32/metadata_v7.csv' # try opening the files try: #scl enable python27 bash # to allow multiple openings on one line with open(metaname, "rb") as mi, open(ifname, "rb") as cl, open(ofname, "wb") as mo: fields = ['identifier','filename','folder','date_created','checksum', \ 'series_number','creating_body','crawl_start', 'crawl_end', \ 'filesize', 'unit'] all_fields = fields + ['date_archivist_note', 'archivist_note'] metareader = DictReader(mi, fieldnames=fields) creader = reader(cl) # will always be tiny wrt metadata so slurp corrupt = {} for row in creader: corrupt[row[0]] = [row[1], row[2]] writer = DictWriter(mo, delimiter=',', fieldnames=all_fields) writer.writerow(dict((fn,fn) for fn in all_fields)) print "[INFO] Opened files successfully." modifyMetadata(metareader, corrupt, writer) except IOError as e: print "[IOERROR] " + e
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom): n_type_keys = { 'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'], 'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'usage_trend'], } buff = StringIO() writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore') writer.writeheader() for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type): d = stat.to_dict() d['start'] = req_input.user_start d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else '' writer.writerow(d) out = buff.getvalue() buff.close() response = HttpResponse(out, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv') return response
def test_behavior_strategy(b: Behavior, s: Strategy, size=20): TRIALS = 10**2 results = [] start = time() dynamic = False for _ in range(TRIALS): r = MemoryManager(s, size, dynamic).handle_string(generate_list(b)) results.append(r) end = time() avg_time = (end - start)/TRIALS print('Average time: ', avg_time) print('Minimum no. page faults: ', min(results)) print('Maximum no. page faults: ', max(results)) avg = sum(results)/len(results) print('Average no. page faults: ', avg) with open('benchmarks.csv', 'r') as record_file: data = DictReader(record_file) entries = [i for i in data] entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults'] new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)} entries.append(new_entry) entries = sorted(entries, key=itemgetter('Behavior', 'Strategy')) with open('benchmarks.csv', 'w', newline='') as record_file: writer = DictWriter(record_file, entry_fields) writer.writeheader() writer.writerows(entries)
def export_feature_matrix_csv(feature_matrix, path, delimiter = ','): """ Save a FeatureMatrix as a column-delimited text file Parameters ---------- feature_matrix : FeatureMatrix FeatureMatrix to save to text file path : str Full path to write text file delimiter : str Character to mark boundaries between columns. Defaults to ',' """ with open(path, encoding='utf-8-sig', mode='w') as f: header = ['symbol'] + feature_matrix.features writer = DictWriter(f, header,delimiter=delimiter) writer.writerow({h: h for h in header}) for seg in feature_matrix.segments: #If FeatureMatrix uses dictionaries #outdict = feature_matrix[seg] #outdict['symbol'] = seg #writer.writerow(outdict) if seg in ['#','']: #wtf continue featline = feature_matrix.seg_to_feat_line(seg) outdict = {header[i]: featline[i] for i in range(len(header))} writer.writerow(outdict)
def job_result_csv(job_id): db_session = db.get_session() db_job = db_session.query(PersistentJob).get(job_id) if not db_job: return json_error('no task exists with id: {0}'.format(job_id)) celery_task = Job.task.AsyncResult(db_job.result_key) if celery_task.ready(): task_result = celery_task.get() csv_io = StringIO() if task_result: # if task_result is not empty find header in first row fieldnames = ['user_id'] + sorted(task_result.values()[0].keys()) else: fieldnames = ['user_id'] writer = DictWriter(csv_io, fieldnames) task_rows = [] # fold user_id into dict so we can use DictWriter to escape things for user_id, row in task_result.iteritems(): row['user_id'] = user_id task_rows.append(row) writer.writeheader() writer.writerows(task_rows) app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue()) return Response(csv_io.getvalue(), mimetype='text/csv') else: return json_response(status=celery_task.status)
def csv_results(self, csv_file, histogram_size=None): specs = self.token_categories names_from_type = { spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs } spec_names = names_from_type.values() spec_names.sort() spec_names.insert(0, "idea") dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id in ordered_idea_ids: base = values[idea_id] sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()} sums['idea'] = idea_names[idea_id] dw.writerow(sums)
def output_results(poi_result_set, screen=True, outfile=None): """ Outputs unified DBSCAN results to screen or csv file. The screen only shows major data elements. The CSV file has the complete dictionary (i.e., base dictionay plus ZOA attributes for each POI) """ assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple' if screen: print "\nZOAs by POI" print "="*80, for poi in poi_result_set: print "\nLocation:\t%s" % poi[s.NAME_KEY] print "Address:\t%s" % poi[s.ADDR_KEY] print "Neighborhood:\t%s" % poi[s.NBHD_KEY] print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY]) print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] if outfile: assert isinstance (outfile, str), "Outfile name is not a string: %r" % name if outfile[-4:] != '.csv': outfile += '.csv' with open(outfile, 'wb') as f: target = DictWriter(f, poi_result_set[0].keys()) target.writeheader() target.writerows(poi_result_set) print "\nWrote output to %s.\n" % outfile
def main(): ''' >>> main() # stuff happens ''' args = parse_args() logging.basicConfig(filename=args.log, level=logging.INFO) input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0)) field_names = set() for input in args.inputs: with open(input) as handle: kraken_data = parse_kraken_file(handle) for row in kraken_data: field_names.add(row['ncbi_taxid']) input_otu_counts[input][row['ncbi_taxid']] += 1 field_names = ['input'] + sorted([ i for i in field_names ]) with open(args.output, 'w') as handle: writer = DictWriter(handle, fieldnames=field_names) writer.writeheader() for input, otu_counts in list(input_otu_counts.items()): otu_counts['input'] = input writer.writerow(otu_counts)
def main(): layout = construct_layout(OFF_PROPERTY_LAYOUT) header = get_active_header(OFF_PROPERTY_LAYOUT) # Prepare CSV output to stdout writer = DictWriter(stdout, fieldnames=header) writer.writeheader() parse = Struct(layout).unpack_from struct_length = calcsize(layout) for line in get_stdin_bytes().readlines(): # Ensure string length is what deconstructer expects if len(line) != struct_length: line = '{:<{}s}'.format(line.decode(), struct_length).encode() # Deconstruct fixed-width string row = parse(line) # Decode each value row = (v.decode('ascii', 'ignore') for v in row) # Trim whitespace in each field row = [field.strip() for field in row] # Convert to dict using header row = dict(zip(header, row)) writer.writerow(row)
def get_vf_precincts(loc_data, precinct_data): with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w: vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP) vfp_writer.writeheader() vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP) vfep_writer.writeheader() for key, vfp_dict in precinct_data.iteritems(): zips = vfp_dict.pop('zips') max_count = 0 max_zip = 0 total_count = 0 for zip_val, zip_count in zips.iteritems(): total_count += zip_count if zip_count > max_count: max_count = zip_count max_zip = zip_val vfp_dict['vf_precinct_zip'] = max_zip vfp_dict['vf_precinct_count'] = total_count examples = vfp_dict.pop('examples') vfp_writer.writerow(vfp_dict) ex_count = 1 for ex in examples: for key in Conversions.VF_EX: vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key] ex_count += 1 vfep_writer.writerow(vfp_dict)
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({ 'uid': uid, 'data': json.dumps(tag_matrix) }) output.close()
def process_vf(loc_data): precinct_data = {} with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w: reader = DictReader(r, dialect='excel-tab') writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED) writer.writeheader() vf_hashes = set() p_count = 0 for row in reader: if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper(): continue vf_hash = get_hash(row, HashFields.VF) if vf_hash in vf_hashes: continue vf_hashes.add(vf_hash) vfp_hash = get_hash(row, HashFields.VFP) row_zip = row['vf_reg_cass_zip'] if vfp_hash not in precinct_data: p_count += 1 precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP) precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count) precinct_data[vfp_hash]['zips'] = {row_zip:1} precinct_data[vfp_hash]['examples'] = [] elif row_zip not in precinct_data[vfp_hash]['zips']: precinct_data[vfp_hash]['zips'][row_zip] = 1 else: precinct_data[vfp_hash]['zips'][row_zip] += 1 vf_output = get_conversion(row, Conversions.VF) if len(precinct_data[vfp_hash]['examples']) < 5: precinct_data[vfp_hash]['examples'].append(vf_output) vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:]) writer.writerow(vf_output) return precinct_data
def plot_file(filename1): base_name = os.path.basename(filename1) name_parts = base_name.split('_') work_path = os.path.dirname(__file__) scores_filename = os.path.join( work_path, '_'.join(name_parts[:2] + ['v3loop_scores.csv'])) if os.path.exists(scores_filename): with open(scores_filename) as f: reader = DictReader(f) score_rows = [list(map(int, row)) for row in map(itemgetter('score', 'count'), reader)] else: source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/', filename1) source2 = source1.replace('_R1_', '_R2_') start = datetime.now() with open(source1) as fastq1, open(source2) as fastq2: score_counts = align_reads(fastq1, fastq2) print('{}: {}'.format(datetime.now() - start, filename1)) score_rows = sorted(score_counts.items()) with open(scores_filename, 'w') as scores_csv: writer = DictWriter(scores_csv, ('score', 'count'), lineterminator=os.linesep) writer.writeheader() for score, count in score_rows: writer.writerow(dict(score=score, count=count)) scores = [row[0] for row in score_rows] counts = [row[1] for row in score_rows] total_count = float(sum(counts)) fractions = [count/total_count for count in counts] plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
def dump_csv(pages, options): """Dump in CSV format. ``pages`` is an iterable of (field, value) tuples. It's assumed that the same fields are used in each tuple. """ from itertools import chain from csv import DictWriter from sys import stdout pages = iter(pages) try: first_row = pages.next() except StopIteration: return fields = [item[0] for item in first_row] rows = chain((first_row,), pages) dicts = (dict(page) for page in rows) dicts = (process_page(row) for row in dicts) def validate_row_length(row_dict): if len(row_dict) != len(fields): raise DataValidationError( 'Inconsistent number of fields in row {0}.\n' 'Fields: {1}'.format(row_dict, fields)) return row_dict dicts = (validate_row_length(row) for row in dicts) writer = DictWriter(stdout, fields, dialect='excel-tab') writer.writerow(dict((v, v) for v in fields)) writer.writerows(dicts)
def convert_powertracker_log_to_csv(path): """ This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data). This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py. :param path: path to the experiment (including [with-|without-malicious]) """ platforms = [p.capitalize() for p in get_available_platforms()] data, results = join(path, 'data'), join(path, 'results') with open(join(data, 'powertracker.log')) as f: log = f.read() iterables, fields = [], ['mote_id'] for it in PT_ITEMS: time_field = '{}_time'.format(it) iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE)) fields.append(time_field) with open(join(results, 'powertracker.csv'), 'w') as f: writer = DictWriter(f, delimiter=',', fieldnames=fields) writer.writeheader() for matches in zip(*iterables): row = {} for m in matches: row.update((k, int(v)) for k, v in m.groupdict().items()) for it in PT_ITEMS: time_field = '{}_time'.format(it) row[time_field] = float(row[time_field] / 10 ** 6) writer.writerow(row)
def main(args): with open("users.csv", "w") as f: fieldnames = ["username", "first_name", "last_name", "email", "phone", "institution", "voro_account"] csvFile = DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") # write header row field_dict = dict([(x, x.capitalize()) for x in fieldnames]) csvFile.writerow(field_dict) for user in User.objects.all(): # look up associated profile & inst try: profile = user.get_profile() phone = profile.phone voro_account = profile.voroEAD_account except UserProfile.DoesNotExist: phone = "" voro_account = False user.__dict__["phone"] = phone user.__dict__["voro_account"] = voro_account # inst through group groups = user.groups.all() instname = "" if len(groups): firstgroup = user.groups.all()[0] grpprofile = firstgroup.groupprofile insts = grpprofile.institutions.all() if len(insts): instname = insts[0].name else: instname = "" user.__dict__["institution"] = instname.encode("utf-8") csvFile.writerow(user.__dict__)
def handle(self, *args, **options): print('Start.') sampled_comments = [] feeds = Facebook_Feed.objects.all() from_date = dateutil.parser.parse(options['from_date']) to_date = dateutil.parser.parse(options['to_date']) sample_size = options['sample_size'] batch_size = options['batch_size'] for i, feed in enumerate(feeds): print('working on feed {} of {}'.format(i + 1, feeds.count())) residual = None sampled_comments_for_feed = [] statuses_for_feed = Facebook_Status.objects.filter(feed__id=feed.id).filter( published__range=[from_date, to_date]).order_by( 'comment_count') for i, status in enumerate(statuses_for_feed): is_last = i + 1 == len(statuses_for_feed) samples_for_status, residual = self.sample(status, is_last, residual, sample_size=sample_size, batch_size=batch_size) sampled_comments_for_feed += samples_for_status sampled_comments += sampled_comments_for_feed print('total_comments:', len(sampled_comments)) with open('{}.csv'.format(args[0]), 'wb') as f: fieldnames = ['comment_id', 'status_id'] writer = DictWriter(f, fieldnames=fieldnames) writer.writerow({'comment_id': 'comment_id', 'status_id': 'status_id'}) for row in sampled_comments: writer.writerow({'comment_id': row['comment_id'], 'status_id': row['parent__status_id']}) print('Done.')
def handle_noargs(self, **options): print "in the command..." comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via') petition_headers = comment_keys + ('name', 'email') # Petition signatories from the first two experiments for filename, url_path in [ ('petition-1.csv', '/county-performance/petition'), ('petition-2.csv', '/county-performance-2/petition'), ]: with open(filename, "wb") as f: writer = DictWriter(f, petition_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['name'] = comment row_data['email'] = f.email writer.writerow(row_data) senate_headers = comment_keys + ('comment',) for filename, url_path in [ ('senate-1.csv', '/county-performance/senate'), ('senate-2.csv', '/county-performance-2/senate'), ]: with open(filename, "wb") as f: writer = DictWriter(f, senate_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['comment'] = comment writer.writerow(row_data)
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", 'w'), ["id", "position"]) o.writeheader() for ii, pp in zip([x['id'] for x in self.test], self.predictions): d = {'id': ii, 'position': pp} o.writerow(d)
def output_list(to_save,path): head = to_save[0].keys() with open(path,'w') as f: csvwriter = DictWriter(f,head,delimiter='\t') csvwriter.writerow({x: x for x in head}) for l in to_save: csvwriter.writerow(l)
def make_classify_report_from_lima(clips_filename, primer_index_dict, flnc_bam=None): """ clips format: """ if flnc_bam is not None: flnc_len_dict = dict( (r.qname, r.qlen) for r in pysam.Samfile(flnc_bam, check_sq=False)) else: flnc_len_dict = None print >> sys.stderr, "WARNING: FLNC BAM not provided. `polyAlen` and `insertlen` fields will be `NA`." f = open('classify_report.csv', 'w') writer = DictWriter(f, fieldnames=report_fields, delimiter=',') writer.writeheader() first_of_pair_seen = False rec = { 'id': None, 'strand': None, 'fivelen': None, 'threelen': None, 'polyAlen': None, 'insertlen': None, 'primer_index': None, 'primer': None } for r in SeqIO.parse(open(clips_filename), 'fasta'): m = clip_rex.match(r.description) zmw = m.group(1) + '/ccs' s, e = int(m.group(2)), int(m.group(3)) bc = int(m.group(4)) if primer_index_dict[bc][0] == '5p': p5 = bc rec['fivelen'], start5, end5 = e - s, s, e else: assert primer_index_dict[bc][0] == '3p' p3 = bc rec['threelen'], start3, end3 = e - s, s, e if first_of_pair_seen: # both pairs seen, write out and reset assert rec['id'] == zmw rec['strand'] = '+' if end5 < end3 else '-' if flnc_len_dict is None or zmw not in flnc_len_dict: rec['insertlen'] = 'NA' rec['polyAlen'] = 'NA' else: rec['insertlen'] = flnc_len_dict[zmw] if rec['strand'] == '+': rec['polyAlen'] = start3 - end5 - rec['insertlen'] else: rec['polyAlen'] = start5 - end3 - rec['insertlen'] rec['primer'] = "{0}--{1}".format(primer_index_dict[p5][1], primer_index_dict[p3][1]) rec['primer_index'] = "{0}--{1}".format(p5, p3) writer.writerow(rec) #f.write("{id},{len5},{len3},{lenA},{lenI},{pn5}--{pn3},{p5}--{p3}\n".format(\ # id=zmw, len5=len5, len3=len3, # lenA=lenA, lenI=lenI, # pn5=primer_index_dict[p5][1], pn3=primer_index_dict[p3][1], # p5=p5, p3=p3)) # reset variables first_of_pair_seen = False rec = { 'id': None, 'strand': None, 'fivelen': None, 'threelen': None, 'polyAlen': None, 'insertlen': None, 'primer_index': None, 'primer': None } p5, p3, start5, end5, start3, end3 = None, None, None, None, None, None else: # first of the pair rec['id'] = zmw first_of_pair_seen = True f.close() print >> sys.stderr, "Classify report written to: {0}".format(f.name)
def run_all(): parser = argparse.ArgumentParser() parser.add_argument('--point_map_file', '-p', default='point_map.csv') parser.add_argument('--nghds_outfile', default='outputs/nghds_num_tweets_by_hour.csv') parser.add_argument('--bins_outfile', default='outputs/bins_num_tweets_by_hour.csv') args = parser.parse_args() point_nghd_map = load_point_nghd_map(args.point_map_file) bin_hr_counts = Counter() # (lat, lon, hr) -> count nghd_hr_counts = Counter() # (nghd, hr) -> count ctr = 0 for tweet in db.tweet_pgh.find(): ctr += 1 if ctr % 1000 == 0: print '%d tweets processed' % ctr lat = tweet['coordinates']['coordinates'][1] lon = tweet['coordinates']['coordinates'][0] (lat, lon) = (round(float(lat), 3), round(float(lon), 3)) if (lat, lon) in point_nghd_map: nghd = point_nghd_map[(lat, lon)] else: nghd = 'Outside Pittsburgh' hr = util.util.get_tweet_hour(tweet) bin_hr_counts[(lat, lon, hr)] += 1 nghd_hr_counts[(nghd, hr)] += 1 pprint.pprint(nghd_hr_counts) bin_writer = DictWriter(open(args.bins_outfile, 'w'), ['lat', 'lon', 'hour', 'count']) bin_writer.writeheader() for bin_hr, count in bin_hr_counts.items(): bin_writer.writerow({ 'lat': bin_hr[0], 'lon': bin_hr[1], 'hour': bin_hr[2], 'count': count }) nghd_writer = DictWriter(open(args.nghds_outfile, 'w'), ['nghd', 'hour', 'count']) nghd_writer.writeheader() for nghd_hr, count in nghd_hr_counts.items(): nghd_writer.writerow({ 'nghd': nghd_hr[0], 'hour': nghd_hr[1], 'count': count })
from csv import DictWriter with open('final.csv', 'w') as f: csv_writer = DictWriter(f, fieldnames=['first_name', 'last_name', 'age']) csv_writer.writerow({ 'first_name': 'sffssd', 'last_name': 'dfgh', 'age': 500 })
def main(): # parse the command line arguments arg_parser = ConfigArgumentParser(description=__doc__) arg_parser.add_argument("device_id", type=int, help="Device ID of the target device") arg_parser.add_argument( "--address", help= "Address of target device, may be needed to help route initial request to device." ) arg_parser.add_argument("--out-file", type=argparse.FileType('wb'), help="Optional output file for configuration", default=sys.stdout) arg_parser.add_argument( "--max_range_report", nargs='?', type=float, help= 'Affects how very large numbers are reported in the "Unit Details" column of the output. ' 'Does not affect driver behavior.', default=1.0e+20) args = arg_parser.parse_args() _log.debug("initialization") _log.debug(" - args: %r", args) # make a device object this_device = LocalDeviceObject( objectName=args.ini.objectname, objectIdentifier=int(args.ini.objectidentifier), maxApduLengthAccepted=int(args.ini.maxapdulengthaccepted), segmentationSupported=args.ini.segmentationsupported, vendorIdentifier=int(args.ini.vendoridentifier), ) # make a simple application this_application = SynchronousApplication(this_device, args.ini.address) _log.debug("starting build") result = get_iam(this_application, args.device_id, args.address) # request = WhoIsRequest() # request.pduDestination = target_address # result = this_application.make_request(request, expect_confirmation = False) # if not isinstance(result, IAmRequest): # result.debug_contents() # raise TypeError("Error making WhoIs request, try running again.") # device_type, device_instance = result.iAmDeviceIdentifier # if device_type != 'device': # raise DecodingError("invalid object type") target_address = result.pduSource _log.debug('pduSource = ' + repr(result.pduSource)) _log.debug('iAmDeviceIdentifier = ' + str(result.iAmDeviceIdentifier)) _log.debug('maxAPDULengthAccepted = ' + str(result.maxAPDULengthAccepted)) _log.debug('segmentationSupported = ' + str(result.segmentationSupported)) _log.debug('vendorID = ' + str(result.vendorID)) device_id = result.iAmDeviceIdentifier[1] try: device_name = read_prop(this_application, target_address, "device", device_id, "objectName") _log.debug('device_name = ' + str(device_name)) except TypeError: _log.debug('device missing objectName') try: device_description = read_prop(this_application, target_address, "device", device_id, "description") _log.debug('description = ' + str(device_description)) except TypeError: _log.debug('device missing description') config_writer = DictWriter( args.out_file, ('Reference Point Name', 'Volttron Point Name', 'Units', 'Unit Details', 'BACnet Object Type', 'Property', 'Writable', 'Index', 'Write Priority', 'Notes')) config_writer.writeheader() try: objectCount = read_prop(this_application, target_address, "device", device_id, "objectList", index=0) list_property = "objectList" except TypeError: objectCount = read_prop(this_application, target_address, "device", device_id, "structuredObjectList", index=0) list_property = "structuredObjectList" _log.debug('objectCount = ' + str(objectCount)) for object_index in xrange(1, objectCount + 1): _log.debug('object_device_index = ' + repr(object_index)) bac_object = read_prop(this_application, target_address, "device", device_id, list_property, index=object_index) obj_type, index = bac_object try: process_object(this_application, target_address, obj_type, index, args.max_range_report, config_writer) except: _log.debug("Unexpected error processing object: {} {}".format( obj_type, index)) _log.debug(traceback.format_exc())
def csv_results(self, csv_file, histogram_size=None): histogram_size = histogram_size or 10 bins = range(histogram_size) bins.insert(0, "idea") bins.extend(["avg", "std_dev"]) dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results, histogram_size) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id, base in ordered_idea_ids: base = values[idea_id] r = dict(enumerate(base['histogram'])) r['idea'] = idea_names[idea_id] r['avg'] = base['avg'] r['std_dev'] = base['std_dev'] dw.writerow(r)