def save_results(results, path, header = None, mode = 'w'): """ Writes results to path specified Parameters ---------- results : dict the results to write path : str the path to the save file header : list Defaults to none mode : str defaults to 'w', or write. Can be 'a', append """ if header is None: try: header = results.keys() except AttributeError: try: header = results[0].keys() except AttributeError: raise(Exception('Could not get the column header from the list, please specify the header.')) with open(path, mode, encoding = 'utf8') as f: writer = DictWriter(f, header) if mode != 'a': writer.writeheader() for line in results: writer.writerow({k: make_safe(line[k], '/') for k in header})
def save_job_results(geocoder, job_id): """ Download and save to S3 results for completed jobs. """ logging.info('Saving results for %s to S3' % job_id) finished_folder = 'geocode_finished_jobs' pending_folder = 'geocode_pending_jobs' connection = boto.connect_s3() bucket = connection.get_bucket(GEO_BUCKET) old_key = bucket.get_key('%s/%s' % (pending_folder, job_id)) new_name = old_key.get_contents_as_string() new_key = Key(bucket) new_key.key = '%s/%s' % (finished_folder, new_name) results = geocoder.get_job_results(job_id) result_string = StringIO.StringIO() writer = DictWriter(result_string, fieldnames=results[0].keys()) writer.writeheader() writer.writerows(results) result_string.seek(0) email_address = old_key.get_metadata('email') if email_address: new_key.set_metadata('email', email_address) send_email_notification( email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished') new_key.set_contents_from_string(result_string.getvalue()) new_key.make_public() old_key.delete()
def write_rows(self, entries): """Write rows in the csv file""" with open(self.output_filename, 'w') as output_file: writer = DictWriter(output_file, FIELDNAMES) writer.writeheader() writer.writerows(entries)
def run_queries(session, state): query_inputs = term_handler(state) combinations = cartesian_product(query_inputs) for query in combinations: PARAMS.update(query) logger.info('query') sleep(SLEEP_TIME) page = session.get(url = OB_BASE % SEARCH_URL, params = PARAMS) logger.info('got page') pricing_data = page.json() with open(WRITE_FILE_PATH, 'wb') as output_file: fieldnames = pricing_data['PricingRecords'][0].keys() fieldnames.append('Scenario') print 'FIELDNAMES %s' % fieldnames logger.info('header %s' % fieldnames) csv_output = DictWriter(output_file, fieldnames=fieldnames) csv_output.writeheader() for row in pricing_data['PricingRecords']: row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']], product=query_inputs["ProductType"][query["ProductType"]], purpose=query_inputs["Purpose"][query["Purpose"]], amount=query_inputs["LoanAmount"][query["LoanAmount"]], ltv=query_inputs["LTV"][query["LTV"]], fico=query_inputs["FICO"][query["FICO"]], term=query_inputs["Term"][query["Term"]]) logger.info('adding row %s' % row) csv_output.writerow(row)
def csv_results(self, csv_file, histogram_size=None): specs = self.token_categories names_from_type = { spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs } spec_names = names_from_type.values() spec_names.sort() spec_names.insert(0, "idea") dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id in ordered_idea_ids: base = values[idea_id] sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()} sums['idea'] = idea_names[idea_id] dw.writerow(sums)
def main(): print "Collecting tweets for {track}".format(**query.track) tweets = get_twitters(query.twitter_url, parameters=query.track) # the filename is set in the query.py settings # write headers if the file does not exist write_headers = True write_opts = 'wb' if os.path.isfile(query.filename): write_headers = False write_opts = 'ab' csv_writer = None for tweet in tweets: tweet = json.loads(tweet) #todo: add the csv writer and json to row row = flatten_json(tweet) # setup CSV writer if is DNE if csv_writer is None: csv_writer = DictWriter(open(query.filename, write_opts), fieldnames=row.keys(), quoting=QUOTE_MINIMAL) # write header row if write_headers: csv_writer.writeheader() write_headers = False csv_writer.writerow(row)
def main(): ''' >>> main() # stuff happens ''' args = parse_args() logging.basicConfig(filename=args.log, level=logging.INFO) input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0)) field_names = set() for input in args.inputs: with open(input) as handle: kraken_data = parse_kraken_file(handle) for row in kraken_data: field_names.add(row['ncbi_taxid']) input_otu_counts[input][row['ncbi_taxid']] += 1 field_names = ['input'] + sorted([ i for i in field_names ]) with open(args.output, 'w') as handle: writer = DictWriter(handle, fieldnames=field_names) writer.writeheader() for input, otu_counts in list(input_otu_counts.items()): otu_counts['input'] = input writer.writerow(otu_counts)
def main(): layout = construct_layout(OFF_PROPERTY_LAYOUT) header = get_active_header(OFF_PROPERTY_LAYOUT) # Prepare CSV output to stdout writer = DictWriter(stdout, fieldnames=header) writer.writeheader() parse = Struct(layout).unpack_from struct_length = calcsize(layout) for line in get_stdin_bytes().readlines(): # Ensure string length is what deconstructer expects if len(line) != struct_length: line = '{:<{}s}'.format(line.decode(), struct_length).encode() # Deconstruct fixed-width string row = parse(line) # Decode each value row = (v.decode('ascii', 'ignore') for v in row) # Trim whitespace in each field row = [field.strip() for field in row] # Convert to dict using header row = dict(zip(header, row)) writer.writerow(row)
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", 'w'), ["id", "position"]) o.writeheader() for ii, pp in zip([x['id'] for x in self.test], self.predictions): d = {'id': ii, 'position': pp} o.writerow(d)
def test_behavior_strategy(b: Behavior, s: Strategy, size=20): TRIALS = 10**2 results = [] start = time() dynamic = False for _ in range(TRIALS): r = MemoryManager(s, size, dynamic).handle_string(generate_list(b)) results.append(r) end = time() avg_time = (end - start)/TRIALS print('Average time: ', avg_time) print('Minimum no. page faults: ', min(results)) print('Maximum no. page faults: ', max(results)) avg = sum(results)/len(results) print('Average no. page faults: ', avg) with open('benchmarks.csv', 'r') as record_file: data = DictReader(record_file) entries = [i for i in data] entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults'] new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)} entries.append(new_entry) entries = sorted(entries, key=itemgetter('Behavior', 'Strategy')) with open('benchmarks.csv', 'w', newline='') as record_file: writer = DictWriter(record_file, entry_fields) writer.writeheader() writer.writerows(entries)
def convert_powertracker_log_to_csv(path): """ This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data). This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py. :param path: path to the experiment (including [with-|without-malicious]) """ platforms = [p.capitalize() for p in get_available_platforms()] data, results = join(path, 'data'), join(path, 'results') with open(join(data, 'powertracker.log')) as f: log = f.read() iterables, fields = [], ['mote_id'] for it in PT_ITEMS: time_field = '{}_time'.format(it) iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE)) fields.append(time_field) with open(join(results, 'powertracker.csv'), 'w') as f: writer = DictWriter(f, delimiter=',', fieldnames=fields) writer.writeheader() for matches in zip(*iterables): row = {} for m in matches: row.update((k, int(v)) for k, v in m.groupdict().items()) for it in PT_ITEMS: time_field = '{}_time'.format(it) row[time_field] = float(row[time_field] / 10 ** 6) writer.writerow(row)
def job_result_csv(job_id): db_session = db.get_session() db_job = db_session.query(PersistentJob).get(job_id) if not db_job: return json_error('no task exists with id: {0}'.format(job_id)) celery_task = Job.task.AsyncResult(db_job.result_key) if celery_task.ready(): task_result = celery_task.get() csv_io = StringIO() if task_result: # if task_result is not empty find header in first row fieldnames = ['user_id'] + sorted(task_result.values()[0].keys()) else: fieldnames = ['user_id'] writer = DictWriter(csv_io, fieldnames) task_rows = [] # fold user_id into dict so we can use DictWriter to escape things for user_id, row in task_result.iteritems(): row['user_id'] = user_id task_rows.append(row) writer.writeheader() writer.writerows(task_rows) app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue()) return Response(csv_io.getvalue(), mimetype='text/csv') else: return json_response(status=celery_task.status)
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom): n_type_keys = { 'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'], 'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'usage_trend'], } buff = StringIO() writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore') writer.writeheader() for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type): d = stat.to_dict() d['start'] = req_input.user_start d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else '' writer.writerow(d) out = buff.getvalue() buff.close() response = HttpResponse(out, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv') return response
def run(): output = open(sys.argv[1], 'w') writer = DictWriter(output, fieldnames=['uid', 'data']) writer.writeheader() db = DB(dbconfig) for uid in fetch_users(db): data = fetch_user_location_logs(uid, db) locations = merge_locations(data) matrix = generate_matrix(locations) semantic_data = fetch_semantic_data(list(matrix.keys())) semantic_dict = {} for row in semantic_data: semantic_dict[row['location']] = clean_tags(row['tags'], 5) tag_matrix = {} for location, proba in list(matrix.items()): tag_dict = semantic_dict[location] tag_weight = sum(v for v in list(tag_dict.values())) if tag_weight == 0: continue for tag, cnt in list(tag_dict.items()): tag_matrix.setdefault(tag, [0] * 48) for i in range(48): tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001) writer.writerow({ 'uid': uid, 'data': json.dumps(tag_matrix) }) output.close()
def handle_noargs(self, **options): print "in the command..." comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via') petition_headers = comment_keys + ('name', 'email') # Petition signatories from the first two experiments for filename, url_path in [ ('petition-1.csv', '/county-performance/petition'), ('petition-2.csv', '/county-performance-2/petition'), ]: with open(filename, "wb") as f: writer = DictWriter(f, petition_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['name'] = comment row_data['email'] = f.email writer.writerow(row_data) senate_headers = comment_keys + ('comment',) for filename, url_path in [ ('senate-1.csv', '/county-performance/senate'), ('senate-2.csv', '/county-performance-2/senate'), ]: with open(filename, "wb") as f: writer = DictWriter(f, senate_headers) writer.writeheader() for f in Feedback.objects.filter(url__endswith=url_path): data, comment = unpack_comment(f.comment) row_data = data.copy() row_data['comment'] = comment writer.writerow(row_data)
def process_vf(loc_data): precinct_data = {} with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w: reader = DictReader(r, dialect='excel-tab') writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED) writer.writeheader() vf_hashes = set() p_count = 0 for row in reader: if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper(): continue vf_hash = get_hash(row, HashFields.VF) if vf_hash in vf_hashes: continue vf_hashes.add(vf_hash) vfp_hash = get_hash(row, HashFields.VFP) row_zip = row['vf_reg_cass_zip'] if vfp_hash not in precinct_data: p_count += 1 precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP) precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count) precinct_data[vfp_hash]['zips'] = {row_zip:1} precinct_data[vfp_hash]['examples'] = [] elif row_zip not in precinct_data[vfp_hash]['zips']: precinct_data[vfp_hash]['zips'][row_zip] = 1 else: precinct_data[vfp_hash]['zips'][row_zip] += 1 vf_output = get_conversion(row, Conversions.VF) if len(precinct_data[vfp_hash]['examples']) < 5: precinct_data[vfp_hash]['examples'].append(vf_output) vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:]) writer.writerow(vf_output) return precinct_data
def get_vf_precincts(loc_data, precinct_data): with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w: vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP) vfp_writer.writeheader() vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP) vfep_writer.writeheader() for key, vfp_dict in precinct_data.iteritems(): zips = vfp_dict.pop('zips') max_count = 0 max_zip = 0 total_count = 0 for zip_val, zip_count in zips.iteritems(): total_count += zip_count if zip_count > max_count: max_count = zip_count max_zip = zip_val vfp_dict['vf_precinct_zip'] = max_zip vfp_dict['vf_precinct_count'] = total_count examples = vfp_dict.pop('examples') vfp_writer.writerow(vfp_dict) ex_count = 1 for ex in examples: for key in Conversions.VF_EX: vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key] ex_count += 1 vfep_writer.writerow(vfp_dict)
def output_results(poi_result_set, screen=True, outfile=None): """ Outputs unified DBSCAN results to screen or csv file. The screen only shows major data elements. The CSV file has the complete dictionary (i.e., base dictionay plus ZOA attributes for each POI) """ assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple' if screen: print "\nZOAs by POI" print "="*80, for poi in poi_result_set: print "\nLocation:\t%s" % poi[s.NAME_KEY] print "Address:\t%s" % poi[s.ADDR_KEY] print "Neighborhood:\t%s" % poi[s.NBHD_KEY] print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY]) print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] if outfile: assert isinstance (outfile, str), "Outfile name is not a string: %r" % name if outfile[-4:] != '.csv': outfile += '.csv' with open(outfile, 'wb') as f: target = DictWriter(f, poi_result_set[0].keys()) target.writeheader() target.writerows(poi_result_set) print "\nWrote output to %s.\n" % outfile
def plot_file(filename1): base_name = os.path.basename(filename1) name_parts = base_name.split('_') work_path = os.path.dirname(__file__) scores_filename = os.path.join( work_path, '_'.join(name_parts[:2] + ['v3loop_scores.csv'])) if os.path.exists(scores_filename): with open(scores_filename) as f: reader = DictReader(f) score_rows = [list(map(int, row)) for row in map(itemgetter('score', 'count'), reader)] else: source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/', filename1) source2 = source1.replace('_R1_', '_R2_') start = datetime.now() with open(source1) as fastq1, open(source2) as fastq2: score_counts = align_reads(fastq1, fastq2) print('{}: {}'.format(datetime.now() - start, filename1)) score_rows = sorted(score_counts.items()) with open(scores_filename, 'w') as scores_csv: writer = DictWriter(scores_csv, ('score', 'count'), lineterminator=os.linesep) writer.writeheader() for score, count in score_rows: writer.writerow(dict(score=score, count=count)) scores = [row[0] for row in score_rows] counts = [row[1] for row in score_rows] total_count = float(sum(counts)) fractions = [count/total_count for count in counts] plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
def main(infile, outfile): with open(infile) as inf, open(outfile, "w") as outf: r = DictReader(inf) rows = [process(row) for row in r] w = DictWriter(outf, fieldnames=rows[0].keys()) w.writeheader() w.writerows(rows)
def run(args): opts = parse_args(args) opts.prefix = opts.prefix or opts.graph.split('.', 1)[0] sheets = {} sheet_headers = {} try: with file(opts.graph) as csvfile: for row in reader(csvfile): fv = dict(column.split('=', 1) for column in row) entity_type = fv.pop('Entity Type') headers = fv.keys() if entity_type not in sheets: sheets[entity_type] = [fv] sheet_headers[entity_type] = set(headers) continue else: sheets[entity_type].append(fv) if len(headers) > len(sheet_headers[entity_type]): sheet_headers[entity_type].union(headers) for entity_type in sheets: with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile: csv = DictWriter(csvfile, sheet_headers[entity_type]) csv.writeheader() csv.writerows(sheets[entity_type]) except IOError, e: print 'csv2sheets: %s' % e exit(-1)
def run(out_file, input_dirs): assert input_dirs files = [] for input_dir in input_dirs: for filename in listdir(input_dir): if filename.endswith(".log"): files.append(join(input_dir, filename)) assert files raw_data = sorted((get_data(filename) for filename in files), key=data_key) aggregated_data = ( indy_vars + aggregate_data(group) for (_key, indy_vars), group in groupby(raw_data, partial(data_key, include_id=False)) ) with open(out_file, "w") as f: out = DictWriter(f, independent_vars + dependent_vars) out.writeheader() out.writerows(raw_data) del out with open("-aggregate".join(splitext(out_file)), "w") as f: out = writer(f) out.writerow(independent_vars[:-1] + ("count",) + dependent_vars) out.writerows(aggregated_data)
def main(): # We open the 2000 file first because it has the headers print("Reading from:", SRC_PATHS['2000']) csv2000 = DictReader(SRC_PATHS['2000'].read_text().splitlines()) # awkward but whatever. We need to use csv2000's headers # and add the 'year' column to it destfile = DEST_PATH.open('w') destcsv = DictWriter(destfile, fieldnames=['year'] + csv2000.fieldnames) destcsv.writeheader() for i, row in enumerate(csv2000): row['year'] = 2000 destcsv.writerow(row) print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH)) # now we open 1990 file and iterate print("Reading from:", SRC_PATHS['1990']) for i, line in enumerate(SRC_PATHS['1990'].read_text().splitlines()): name, freq, cumfreq, rank = re.search(RX_ROW_1990, line).groups() row = { 'name': name.strip(), 'rank': int(rank), 'year': 1990, 'prop100k': int(float(freq) * 1000), 'cum_prop100k': int(float(cumfreq) * 1000), } destcsv.writerow(row) print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH)) # all done destfile.close()
def main(): search_par_h = open("data/search_params.csv", "w") writer = DictWriter(search_par_h, fieldnames=["SearchID", "SearchParams"]) writer.writeheader() for t, row in read_tsv("data/SearchInfo.tsv"): sparams = row["SearchParams"] if not sparams: continue sid = int(row["SearchID"]) sparams = re.sub(r"([A-Za-z0-9]+):", r'"\1":', sparams) sparams = sparams.replace("'", "\"") sparams = sparams.replace("Минивэн\",", "\"Минивэн\",") sparams = sparams.replace("Микроавтобус\"]", "\"Микроавтобус\"]") sparams = unicode(sparams, "utf-8") try: sparams = json.loads(sparams) for k, v in sparams.items(): t = type(v) if t not in type_set: print t, k, v type_set.add(t) sparams_str = json.dumps(sparams) writer.writerow({"SearchID": sid, "SearchParams": sparams_str}) except Exception as e: print e print sparams
def customer_stats(outfile=None): sales = sales_grouped_by_users() stats = {} for user_id, items in sales: item_list = list(items) data = {} data['user_id'] = user_id data['n_lines'] = len(item_list) #all orders fill_items(data, item_list, suffix='') #online orders item_list_online = [i for i in item_list if i['online_order_number']] fill_items(data, item_list_online, suffix='_online') # sale items item_list_on_sale = [i for i in item_list if i['on_sale'] == 't'] fill_items(data, item_list_on_sale, suffix='_on_sale') stats[user_id] = data if outfile is not None: fieldnames = sorted(data.keys()) dw = DictWriter(open(outfile, 'w'), fieldnames=fieldnames) dw.writeheader() for user_id, row in stats.iteritems(): dw.writerow(row) return stats.values()
def write_csv(output_file, address_dicts): geocoded_file = open(output_file, 'wb') writer = DictWriter(geocoded_file, fieldnames=address_dicts[0].keys(), \ dialect='excel', lineterminator='\n') writer.writeheader() writer.writerows(address_dicts) geocoded_file.close()
def write_output(): with open( "urls-used-for-local-transactions-with-statuses-and-jumbled-urls-and-pageviews.csv", "w", encoding="utf8" ) as output: writer = DictWriter(output, fields) writer.writeheader() writer.writerows(urls_used_with_pageviews)
def write_data(outfile, data, fields): with open(outfile, 'w') as outfile: writer = DictWriter(outfile, fieldnames=fields) writer.writeheader() for d in data: writer.writerow(d)
def writePredictions(self): print "In writePredictions" o = DictWriter(open("predictions.csv", "w"), ["id", "position"]) o.writeheader() for ii, pp in zip([x["id"] for x in self.test], self.predictions): d = {"id": ii, "position": pp} o.writerow(d)
def main(): parser = ArgumentParser() parser.add_argument('--csvfile', '-c', default=None, metavar='F', help='csv file containing trybooking report') parser.add_argument('--verbose', '-v', action='store_true', help='print verbose messages') args = parser.parse_args() csvinput = args.csvfile if csvinput is None: csvinput, _ = latest_report( None, rdir=develdir, nre=r'^(\d{8}).csv$', n2dt=lambda m: datetime.strptime(m.group(1), '%d%m%Y'), verbose=args.verbose ) if csvinput is None: raise RuntimeError('no trybooking report found!') if args.verbose: print( '[trybooking report selected: {}]'.format(csvinput), file=sys.stderr ) with open(csvinput, 'r', newline='') as infile: _ = infile.read(1) reader = DictReader(infile) orecs = [] for inrec in reader: if inrec['Void'] == 'Yes': if args.verbose: print( 'ignore VOID record: {}'.format(inrec), file=sys.stderr ) continue name = inrec['Ticket Data: Player\'s First Name'] + ' ' + \ inrec['Ticket Data: Player\'s Surname'] date_of_birth = inrec['Ticket Data: Player\'s Date-of-Birth'] paid = inrec['Net Booking'] medical = inrec[ 'Ticket Data: Special Requirements/Medical Conditions' ] isparent = ( inrec['Ticket Data: Purchaser is Player\'s Parent/Guardian'] == 'Yes' ) if isparent: parent = inrec['Booking First Name'] + ' ' + \ inrec['Booking Last Name'] address = make_address( inrec['Booking Address 1'], inrec['Booking Address 2'], inrec['Booking Suburb'], inrec['Booking Post Code'], ) phone = inrec['Booking Telephone'] email = inrec['Booking Email'] else: parent = inrec['Ticket Data: Parent/Guardian Name'] address = inrec['Ticket Data: Parent/Guardian Address'] phone = inrec['Ticket Data: Parent/Guardian Phone'] email = inrec['Ticket Data: Parent/Guardian Email'] orecs.append( dict( paid=paid, name=name, date_of_birth=date_of_birth, parent=parent, email=email, phone=make_phone(phone), address=address.title(), medical=medical, ) ) if len(orecs) == 0: print('No CSV records in "{}"'.format(csvinput)) sys.exit(0) with TextIOWrapper(sys.stdout.buffer, newline='') as outfile: writer = DictWriter(outfile, fieldnames=orecs[0].keys()) writer.writeheader() for outrec in orecs: writer.writerow(outrec) return 0
def trim5p3p(records, output_prefix): f_FL = open(output_prefix + '.fl.fasta', 'w') f_FL_clips = open(output_prefix + '.fl.clips', 'w') f_nFL = open(output_prefix + '.nfl.fasta', 'w') f_csv = open(output_prefix + '.csv', 'w') writer = DictWriter(f_csv, fieldnames=['id', 'end5', 'end3', 'endA', 'strand']) writer.writeheader() for r in records: r2 = r.reverse_complement() r2.id = r.id t1 = trim5p3p_helper(r) t2 = trim5p3p_helper(r2) is_fl_flag1 = t1.score5 >= MINSCORE_5P and t1.score3 >= MINSCORE_3P and ( MIN_A_LEN == 0 or t1.endA != t1.end3) is_fl_flag2 = t2.score5 >= MINSCORE_5P and t2.score3 >= MINSCORE_3P and ( MIN_A_LEN == 0 or t2.endA != t2.end3) if is_fl_flag1: if is_fl_flag2: if t1.score5 + t1.score3 > t2.score5 + t2.score3: strand = '+' else: strand = '-' else: # pick t1 strand = '+' elif is_fl_flag2: strand = '-' else: strand = 'NA' info = { 'id': r.id, 'end5': 'NA', 'end3': 'NA', 'endA': 'NA', 'strand': 'NA' } if strand == '+': info['strand'] = '+' info['end5'] = t1.end5 info['end3'] = t1.end3 info['endA'] = t1.endA f_FL.write(">{0}\n{1}\n".format(r.id, r.seq[t1.end5:t1.endA])) f_FL_clips.write(">{0}_5p strand:+ score:{1}\n{2}\n".format( r.id, t1.score5, r.seq[:t1.end5])) f_FL_clips.write(">{0}_3p strand:+ score:{1}\n{2}\n".format( r.id, t1.score3, r.seq[t1.endA:])) elif strand == '-': info['strand'] = '-' info['end5'] = t2.end5 info['end3'] = t2.end3 info['endA'] = t2.endA f_FL.write(">{0}\n{1}\n".format(r2.id, r2.seq[t2.end5:t2.endA])) f_FL_clips.write(">{0}_5p strand:- score:{1}\n{2}\n".format( r.id, t2.score5, r2.seq[:t2.end5])) f_FL_clips.write(">{0}_3p strand:- score:{1}\n{2}\n".format( r.id, t2.score3, r2.seq[t2.endA:])) else: # non-fL, but we still wanna trim away the stuff if t1.score5 + t1.score3 > t2.score5 + t2.score3: f_nFL.write(">{0} strand:+?\n{1}\n".format( r.id, r.seq[t1.end5:t1.endA])) else: f_nFL.write(">{0} strand:-?\n{1}\n".format( r2.id, r2.seq[t2.end5:t2.endA])) writer.writerow(info) f_csv.close() f_FL.close() f_FL_clips.close() f_nFL.close()
def ensure_history_file_exists(): """Удостовериться что файл для истории запросов есть""" if not exists(HISTORY_PATH): with open(HISTORY_PATH, 'a+') as tfile: writer = DictWriter(tfile, CSV_FIELDS) writer.writeheader()
with open(my_file) as csvfile: records = [] waitlist = DictReader(csvfile) for row in waitlist: records.append(row) column_headers = records[0].keys() input = input('Enter the column header you would like to split: \n') if input not in column_headers: print("Input supplied not in column headings.... exiting.") sys.exit(1) for record in records: target = record[input] split_names = target.split(' ') del record[input] record['first %s' % input] = split_names[0] record['last %s' % input] = '' if len(split_names) > 1: record['last %s' % input] = ' '.join(split_names[1:]) output_filename = 'outfiles/waitlist-%s.csv' % (''.join( choice(ascii_lowercase) for i in range(4))) with open(output_filename, 'w') as outfile: writer = DictWriter(outfile, records[0].keys()) writer.writeheader() writer.writerows(records)
def evaluate_alignment_sam(input_fa_or_fq, sam_filename, genome_d, output_prefix, junction_info=None): h1 = open(output_prefix + '.alignment_report.txt', 'w') h2 = open(output_prefix + '.junction_report.txt', 'w') w1 = DictWriter(h1, fieldnames=fieldnames_report1) w2 = DictWriter(h2, fieldnames=fieldnames_report2) w1.writeheader() w2.writeheader() #fieldnames_report1 = ['seqid', 'coverage', 'identity', 'num_sub', 'num_ins', 'num_del', 'num_exons'] #fieldnames_report2 = ['seqid', 'donor_pos', 'donor_seq', 'donor_dist', 'acceptor_pos', 'acceptor_seq', 'acceptor_dist'] query_len_dict = dict((r.id, len(r.seq)) for r in SeqIO.parse( open(input_fa_or_fq), type_fa_or_fq(input_fa_or_fq))) for r in GMAPSAMReader(sam_filename, True, query_len_dict=query_len_dict): if r.sID == '*': # unaligned rec1 = { 'seqid': r.qID, 'coverage': 'NA', 'identity': 'NA', 'num_sub': 'NA', 'num_ins': 'NA', 'num_del': 'NA', 'num_exons': 'NA' } w1.writerow(rec1) continue rec1 = { 'seqid': r.qID, 'coverage': r.qCoverage, 'identity': r.identity, 'num_sub': r.num_nonmatches - r.num_del - r.num_ins, 'num_ins': r.num_ins, 'num_del': r.num_del, 'num_exons': len(r.segments) } w1.writerow(rec1) for i in range(0, len(r.segments) - 1): rec2 = {'seqid': r.qID} seq1, seq2 = get_donor_acceptor(genome_d, r.sID, r.flag.strand, r.segments[i].end - 1, r.segments[i + 1].start) if r.flag.strand == '+': rec2['donor_pos'] = "{0}:+:{1}".format(r.sID, r.segments[i].end - 1) rec2['acceptor_pos'] = "{0}:+:{1}".format( r.sID, r.segments[i + 1].start) else: rec2['donor_pos'] = "{0}:-:{1}".format(r.sID, r.segments[i + 1].start) rec2['acceptor_pos'] = "{0}:-:{1}".format( r.sID, r.segments[i].end - 1) rec2['donor_seq'] = seq1 rec2['acceptor_seq'] = seq2 if junction_info is not None: rec2['donor_dist'], rec2[ 'acceptor_dist'] = get_closest_junction_dist( junction_info, r.sID, r.flag.strand, r.segments[i].end - 1, r.segments[i + 1].start) else: rec2['donor_dist'] = 'NA' rec2['acceptor_dist'] = 'NA' w2.writerow(rec2)
def run_optimization(atoms: Atoms, dihedrals: List[DihedralInfo], n_steps: int, calc: Calculator, init_steps: int, out_dir: Optional[Path], relax: bool = True) -> Atoms: """Optimize the structure of a molecule by iteratively changing the dihedral angles Args: atoms: Atoms object with the initial geometry dihedrals: List of dihedral angles to modify n_steps: Number of optimization steps to perform init_steps: Number of initial guesses to evaluate calc: Calculator to pick the energy out_dir: Output path for logging information relax: Whether to relax non-dihedral degrees of freedom each step Returns: (Atoms) optimized geometry """ # Perform an initial relaxation _, init_atoms = relax_structure(atoms, calc) if out_dir is not None: with open(out_dir.joinpath('relaxed.xyz'), 'w') as fp: simple_write_xyz(fp, [init_atoms]) # Evaluate initial point start_coords = np.array([d.get_angle(init_atoms) for d in dihedrals]) start_energy, start_atoms = evaluate_energy(start_coords, atoms, dihedrals, calc, relax) logger.info(f'Computed initial energy: {start_energy}') # Begin a structure log, if output available if out_dir is not None: log_path = out_dir.joinpath('structures.csv') ens_path = out_dir.joinpath('ensemble.xyz') with log_path.open('w') as fp: writer = DictWriter(fp, ['time', 'xyz', 'energy', 'ediff']) writer.writeheader() def add_entry(coords, atoms, energy): with log_path.open('a') as fp: writer = DictWriter( fp, ['time', 'coords', 'xyz', 'energy', 'ediff']) xyz = StringIO() simple_write_xyz(xyz, [atoms]) writer.writerow({ 'time': datetime.now().timestamp(), 'coords': coords.tolist(), 'xyz': xyz.getvalue(), 'energy': energy, 'ediff': energy - start_energy }) with ens_path.open('a') as fp: simple_write_xyz(fp, [atoms], comment=f'\t{energy}') add_entry(start_coords, start_atoms, start_energy) # Make some initial guesses init_guesses = np.random.normal(start_coords, 30, size=(init_steps, len(dihedrals))) init_energies = [] for i, guess in enumerate(init_guesses): energy, cur_atoms = evaluate_energy(guess, start_atoms, dihedrals, calc, relax) init_energies.append(energy - start_energy) logger.info( f'Evaluated initial guess {i+1}/{init_steps}. Energy-E0: {energy-start_energy}' ) if out_dir is not None: add_entry(guess, cur_atoms, energy) # Save the initial guesses observed_coords = [start_coords, *init_guesses.tolist()] observed_energies = [0.] + init_energies # Loop over many steps cur_atoms = start_atoms.copy() for step in range(n_steps): # Make a new search space best_coords = select_next_points_botorch(observed_coords, observed_energies) # Compute the energies of those points energy, cur_atoms = evaluate_energy(best_coords, cur_atoms, dihedrals, calc, relax) logger.info( f'Evaluated energy in step {step+1}/{n_steps}. Energy-E0: {energy-start_energy}' ) if energy - start_energy < np.min( observed_energies) and out_dir is not None: with open(out_dir.joinpath('current_best.xyz'), 'w') as fp: simple_write_xyz(fp, [cur_atoms]) # Update the log if out_dir is not None: add_entry(start_coords, cur_atoms, energy) # Update the search space observed_coords.append(best_coords) observed_energies.append(energy - start_energy) # Final relaxations best_atoms = cur_atoms.copy() best_coords = observed_coords[np.argmin(observed_energies)] best_energy, best_atoms = evaluate_energy(best_coords, best_atoms, dihedrals, calc) logger.info('Performed final relaxation with dihedral constraints.' f'E: {best_energy}. E-E0: {best_energy - start_energy}') if out_dir is not None: add_entry(np.array(best_coords), best_atoms, best_energy) # Relaxations best_atoms.set_constraint() best_energy, best_atoms = relax_structure(best_atoms, calc) logger.info('Performed final relaxation without dihedral constraints.' f' E: {best_energy}. E-E0: {best_energy - start_energy}') best_coords = np.array([d.get_angle(best_atoms) for d in dihedrals]) if out_dir is not None: add_entry(best_coords, best_atoms, best_energy) return best_atoms
from csv import DictReader, DictWriter def cm_to_in(cm): return round(float(cm) * 0.393701, 2) with open("Datasets/fighters.csv") as file: csv_reader = DictReader(file) fighters = list(csv_reader) with open("Datasets/inches_fighters.csv", "w") as file: headers = ("Name", "Country", "Height") csv_writer = DictWriter(file, fieldnames=headers, lineterminator='\n') csv_writer.writeheader() for fighter in fighters: csv_writer.writerow({ "Name": fighter["Name"], "Country": fighter["Country"], "Height": cm_to_in(fighter["Height (in cm)"]) })
if cols == HEADERS: headers_found = True else: # headers have been found # don't capture anything if unless # nearly all cells are filled... # if not all(c == '' for c in cols) and len(HEADERS) - len(cols) == 0: if 'Employee' in cols[1] or '$' in cols[2]: yield dict(zip(HEADERS, cols)) if __name__ == '__main__': parser = ArgumentParser("Convert WH salary XLS page(s) to CSV") parser.add_argument('inpath', type=str, help="Path to a XLSX file, or directory of them") args = parser.parse_args() inpath = args.inpath if isdir(inpath): filenames = glob(joinpath(inpath, '*.xls?')) else: filenames = [inpath] # set up the CSV csvout = DictWriter(stdout, fieldnames=HEADERS) csvout.writeheader() for fname in filenames: for d in process_wh_salary_workbook(fname): csvout.writerow(d)
) global_stats = Counter() global_stats_headers = set() for v in stats.values(): global_stats_headers |= set(v.keys()) fieldnames = ["notice_id"] + sorted(global_stats_headers) with open(os.path.join(outdir, "__detailed_stats.csv"), "w") as f_detailed, open( os.path.join(outdir, "__global_stats.json"), "w") as f_global: w = DictWriter(f_detailed, fieldnames=fieldnames) w.writeheader() for k in natsorted(stats.keys()): global_stats.update(stats[k]) row = {"notice_id": k} row.update(stats[k]) w.writerow(row) json.dump(global_stats, f_global, indent=4, sort_keys=True) with open(os.path.join(outdir, "__detailed_stats.csv"), "r") as f_in, open( os.path.join(outdir, "__detailed_stats.txt"), "w") as f_out: f_out.write(prettytable.from_csv(f_in).get_string())
def gaVRPTW(pop, instName, unitCost, waitCost, delayCost, speed, indSize, popSize, cxPb, mutPb, NGen, exportCSV=False, customizeData=False): if customizeData: jsonDataDir = os.path.join('C:\Users\s.janischka\PycharmProjects\py-ga-VRPTW\data', 'json_customize') else: jsonDataDir = os.path.join('C:\Users\s.janischka\PycharmProjects\py-ga-VRPTW\data', 'json') jsonFile = os.path.join(jsonDataDir, '%s.json' % instName) with open(jsonFile) as f: instance = load(f) # Operator registering toolbox.register('evaluate', core.evalVRPTW, instance=instance, unitCost=unitCost, waitCost=waitCost, delayCost=delayCost, speed=speed) toolbox.register('select', tools.selRoulette) toolbox.register('mate', core.cxPartialyMatched) toolbox.register('mutate', core.mutInverseIndexes) pop=pop # Results holders for exporting results to CSV file csvData = [] print 'Start of evolution' # Evaluate the entire population fitnesses = list(toolbox.map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit # Debug, suppress print() # print ' Evaluated %d individuals' % len(pop) # Begin the evolution for g in range(NGen): print '-- Generation %d --' % g # Select the next generation individuals # Select elite - the best offpsring, keep this past crossover/mutate elite = tools.selBest(pop, 1) # Select top 10% of all offspring # Roulette select the rest 90% of offsprings offspring = tools.selBest(pop, int(numpy.ceil(len(pop)*0.1))) offspringRoulette = toolbox.select(pop, int(numpy.floor(len(pop)*0.9))-1) offspring.extend(offspringRoulette) # Clone the selected individuals offspring = list(toolbox.map(toolbox.clone, offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < cxPb: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < mutPb: toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalidInd = [ind for ind in offspring if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalidInd) for ind, fit in zip(invalidInd, fitnesses): ind.fitness.values = fit # Debug, suppress print() # print ' Evaluated %d individuals' % len(invalidInd) # The population is entirely replaced by the offspring # Debug, printing offspring offspring.extend(elite) pop[:] = offspring # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in pop] length = len(pop) mean = sum(fits) / length sum2 = sum(x*x for x in fits) std = abs(sum2 / length - mean**2)**0.5 # Debug, suppress print() # print ' Min %s' % min(fits) # print ' Max %s' % max(fits) # print ' Avg %s' % mean # print ' Std %s' % std # Write data to holders for exporting results to CSV file if exportCSV: csvRow = { 'generation': g, 'evaluated_individuals': len(invalidInd), 'min_fitness': min(fits), 'max_fitness': max(fits), 'avg_fitness': mean, 'std_fitness': std, 'avg_cost': 1 / mean, } csvData.append(csvRow) print '-- End of (successful) evolution --' bestInd = tools.selBest(pop, 1)[0] print 'Best individual: %s' % bestInd print 'Fitness: %s' % bestInd.fitness.values[0] core.printRoute(core.ind2route(bestInd, instance, speed)) print 'Total cost: %s' % (1 / bestInd.fitness.values[0]) if exportCSV: csvFilename = '%s_uC%s_wC%s_dC%s_iS%s_pS%s_cP%s_mP%s_nG%s.csv' % (instName, unitCost, waitCost, delayCost, indSize, popSize, cxPb, mutPb, NGen) csvPathname = os.path.join('results', csvFilename) print 'Write to file: %s' % csvPathname utils.makeDirsForFile(pathname=csvPathname) if not utils.exist(pathname=csvPathname, overwrite=True): with open(csvPathname, 'w') as f: fieldnames = ['generation', 'evaluated_individuals', 'min_fitness', 'max_fitness', 'avg_fitness', 'std_fitness', 'avg_cost'] writer = DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for csvRow in csvData: writer.writerow(csvRow) return core.ind2route(bestInd, instance, speed)
def demux_by_barcode(samples, fps, dir_out, path_counts, path_details, dry_run=False): """Demultiplex one trio of files from dictionaries of sample and barcode data. samples: dictionary of sample attributes for this run, with BarcodeFwdSeq and BarcodeRevSeq defined fps: dict of "I1", "R1", and "R2" keys pointing to file paths to fastq.gz inputs dir_out: output directory to write demultiplexed fastq.gz files to path_counts: path to csv to write per-sample read counts to. If empty this file isn't written. path_details: csv.gz file to write detailed per-read information. If empty, this file isn't written. dry_run: If True, don't actually call any commands or write any files. """ counts = defaultdict(int) # nested dictionary of sample name -> trios of I1/R1/R2 paths # NOTE # with too many samples at once, this will cause an OS error due to too # many open files. In that case we'd have to open/close as needed. It's # easy here to just open a bunch and store handles in a dictionary, though. fp_outs = { s: {rdid: Path(dir_out) / f"{s}.{rdid}.fastq.gz" for rdid in READS} for s in samples } fp_outs["None"] = { rdid: Path(dir_out) / f"unassigned.{rdid}.fastq.gz" for rdid in READS } for samp in fp_outs: LOGGER.info("output I1 for %s: %s", samp, fp_outs[samp]["I1"]) # lookup table between pairs of barcodes and sample names bc_map = {(v["BarcodeFwdSeq"], v["BarcodeRevSeq"]): k for k, v in samples.items()} if not dry_run: Path(dir_out).mkdir(parents=True, exist_ok=True) try: f_outs = { s: {rdid: GZIP_OPEN(fp_outs[s][rdid], "wt") for rdid in READS} for s in fp_outs } details_writer = None if path_details: Path(path_details).parent.mkdir(parents=True, exist_ok=True) f_details = GZIP_OPEN(path_details, "wt") details_writer = DictWriter(f_details, fieldnames=[ "SeqID", "BarcodeFwdSeq", "BarcodeRevSeq", "BarcodeFwdQualMin", "BarcodeRevQualMin" ], lineterminator="\n") details_writer.writeheader() with GZIP_OPEN(fps["I1"], "rt") as f_i1, \ GZIP_OPEN(fps["R1"], "rt") as f_r1, \ GZIP_OPEN(fps["R2"], "rt") as f_r2: for trio in zip( # each of these is a tuple of (seqid, seq, qual) text FastqGeneralIterator(f_i1), FastqGeneralIterator(f_r1), FastqGeneralIterator(f_r2)): trio = list(trio) trio.extend([ assign_barcode_fwd(trio[1][1], BARCODES_FWD), assign_barcode_rev(trio[0][1], BARCODES_REV) ]) _write_chunk([trio], bc_map, f_outs, counts, details_writer) finally: for trio in f_outs.values(): for f_rd in trio.values(): f_rd.close() if path_details: f_details.close() if path_counts: _write_counts(path_counts, counts)
def get_citation_prov_csv(self): s_res = StringIO() writer = DictWriter(s_res, Citation.header_provenance_data) writer.writeheader() writer.writerow(loads(self.get_citation_prov_json())) return s_res.getvalue()
def test_urls(self): headers = "in_url class url resource_url resource_file target_file scheme proto resource_format target_format " \ "is_archive encoding target_segment".split() import tempfile tf = tempfile.NamedTemporaryFile(prefix="rowgen", delete=False) temp_name = tf.name tf.close() # S3 URLS have these fields which need to be removed before writing to CSV files. def clean(do): for f in ['_orig_url', '_key', '_orig_kwargs', '_bucket_name']: try: del do[f] except KeyError: pass with open(data_path('url_classes.csv')) as f, open(temp_name, 'w') as f_out: w = None r = DictReader(f) errors = 0 for i, d in enumerate(r): url = d['in_url'] o = Url(url) do = dict(o.__dict__.items()) del do['parts'] if w is None: w = DictWriter(f_out, fieldnames=headers) w.writeheader() do['in_url'] = url do['is_archive'] = o.is_archive do['class'] = o.__class__.__name__ clean(do) w.writerow(do) d = {k: v if v else None for k, v in d.items()} do = {k: str(v) if v else None for k, v in do.items()} # str() turns True into 'True' # a is the gague data from url_classes.csv # b is the test object. try: # A, B self.compare_dict(url, d, do) except AssertionError as e: errors += 1 print(e) # raise self.assertEqual(0, errors) with open(data_path('url_classes.csv')) as f: r = DictReader(f) for i, d in enumerate(r): u1 = Url(d['in_url']) with open(data_path('url_classes.csv')) as f: r = DictReader(f) for i, d in enumerate(r): u1 = Url(d['in_url']) d1 = u1.__dict__.copy() d2 = deepcopy(u1).__dict__.copy() # The parts will be different Bunch objects clean(d1) clean(d2) del d1['parts'] del d2['parts'] self.assertEqual(d1, d2) self.assertEqual(d1, u1.dict) for us in ("http://example.com/foo.zip", "http://example.com/foo.zip#a;b"): u = Url(us, encoding='utf-8') u2 = u.update(target_file='bingo.xls', target_segment='1') self.assertEqual('utf-8', u2.dict['encoding']) self.assertEqual('bingo.xls', u2.dict['target_file']) self.assertEqual('1', u2.dict['target_segment'])
def filter_by_count(input_prefix, output_prefix, min_count, dun_use_group_count=False): group_filename = input_prefix + '.group.txt' count_filename = input_prefix + '.abundance.txt' gff_filename = input_prefix + '.gff' rep_filename = input_prefix + '.rep.fq' if not dun_use_group_count: # read group group_max_count_fl = {} group_max_count_p = {} f = open(group_filename) for line in f: #ex: PB.1.1 i0HQ_54b0ca|c58773/f30p16/700 pbid, members = line.strip().split('\t') group_max_count_fl[pbid] = 0 group_max_count_p[pbid] = 0 members = members.split(',') for m in members: i = m.find('|') if i > 0: tmp = m.split('|')[1].split('/')[1] #ex: tmp = f30p16 else: tmp = m.split('/')[1] fl_count, p_count = tmp.split('p') fl_count = int(fl_count[1:]) p_count = int(p_count) group_max_count_fl[pbid] = max(group_max_count_fl[pbid], fl_count) group_max_count_p[pbid] = max(group_max_count_p[pbid], p_count) f.close() # read abundance first f = open(count_filename) count_header = '' while True: cur_pos = f.tell() line = f.readline() if not line.startswith('#'): f.seek(cur_pos) break else: count_header += line d = dict((r['pbid'], r) for r in DictReader(f, delimiter='\t')) for k, v in d.iteritems(): print k, v f.close() # group_max_count_p NOT used for now good = filter( lambda x: int(d[x]['count_fl']) >= min_count and (dun_use_group_count or group_max_count_fl[x] >= min_count), d) # write output GFF f = open(output_prefix + '.gff', 'w') for r in GFF.collapseGFFReader(gff_filename): if r.seqid in good: GFF.write_collapseGFF_format(f, r) f.close() # write output rep.fq f = open(output_prefix + '.rep.fq', 'w') for r in SeqIO.parse(open(rep_filename), 'fastq'): if r.name.split('|')[0] in good: SeqIO.write(r, f, 'fastq') f.close() # write output to .abundance.txt f = open(output_prefix + '.abundance.txt', 'w') f.write(count_header) writer = DictWriter(f, fieldnames=['pbid','count_fl','count_nfl','count_nfl_amb','norm_fl','norm_nfl','norm_nfl_amb'], \ delimiter='\t', lineterminator='\n') writer.writeheader() for k in good: r = d[k] writer.writerow(r) f.close() print >> sys.stderr, "Output written to:", output_prefix + '.gff' print >> sys.stderr, "Output written to:", output_prefix + '.rep.fq' print >> sys.stderr, "Output written to:", output_prefix + '.abundance.txt'
ANAND_index = 0 right = 0 total = len(dev_test) for ii in dev_test: prediction = classifier.classify(ii[0]) if prediction == ii[1]: right += 1 else: ANAND_writer.writerow(AP_TRAIN_LIST[ANAND_index] + [prediction]) ANAND_index += 1 sys.stderr.write("Accuracy on dev: %f\n" % (float(right) / float(total))) if testfile is None: sys.stderr.write("No test file passed; stopping.\n") else: # Retrain on all data classifier = nltk.classify.NaiveBayesClassifier.train(dev_train + dev_test) # Read in test section test = {} for ii in DictReader(testfile, delimiter='\t'): test[ii['id']] = classifier.classify(fe.features(ii['text'])) # Write predictions o = DictWriter(outfile, ['id', 'pred']) o.writeheader() for ii in sorted(test): o.writerow({'id': ii, 'pred': test[ii]})
def submit(req): #limpando lista lista_all.clear() #objeto para gerenciar arquivos de imagem fs = FileSystemStorage() """ Verifica se a pasta 'media' existe, caso ela exista é excluída (pois irá possuir imagens de submissões anteriores.), caso ela já tenha sido excluída é então recriada mais uma vez. """ if(fs.exists(fs.location)): shutil.rmtree(fs.location) else: os.mkdir(fs.location) """ Irá verificar se foi passado na requisição um arquivo de imagem e se o method HTTP passado é do tipo POST """ if req.method == 'POST' and req.FILES['myFile']: myfile = req.FILES['myFile']#Põe a imagem em um objeto. """ O arquivo é salvo no método abaixo que retorna seu caminho para a variável filename. """ filename = fs.save(myfile.name, myfile)#retorna o nome do arquivo.extensão. uploaded_file_url = fs.url(filename)#retorna o caminho completo do nome de arquivo passado. module_dir = os.path.dirname(__file__)#pega o diretório atual do arquivo. file_path = os.path.join(module_dir, "yolofiles/yoloDados/") #objeto para gerenciar arquivos de imagem fs = FileSystemStorage() img_path = os.path.join(fs.location, filename) image = cv2.imread(img_path) #variaveis de captura h, w = None, None #carrega os arquivos com o nome dos objetos que foi treinado para identificar with open(f"{file_path}YoloNames.names") as f: #cria uma lista com todos os nomes labels = [line.strip() for line in f] #carrega arquivos treinados pelo framework network = cv2.dnn.readNetFromDarknet(f"{file_path}yolov3.cfg", f"{file_path}yolov3.weights") #captura ua lista com todos os nomes dos objetos treinados pelo framework layers_names_all = network.getLayerNames() #obtendo apenas o nome de camadas de saida que precisamos para o algoritmo Yolov3 #com função de retornar o indice das camadas com saidas desconectadas layers_names_output = \ [layers_names_all[i[0] - 1] for i in network.getUnconnectedOutLayers()] # Definir probabilidade minima para eliminar previsões fracas probability_minimum = 0.5 #Definir limite para filtrar caixas delimitadoras fracas #com supressão não máxima threshold = 0.3 #Gera cores aleatórias nas caixas de cada objeto detectados. colours = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8") #loop de captura e detecção de objetos with open(f"{module_dir}/results.csv", "w") as arquivo:#criando/lendo o arquivo que vai guardar os testes cabecalho = ["Objeto", "Porcentagem"] escritor_csv = DictWriter(arquivo, fieldnames=cabecalho) escritor_csv.writeheader() while True: if w is None or h is None: #fatiar apenas dois primeiros elementos da tupla h, w = image.shape[:2] #A forma resultante possui um numero de quadros, numero de canais, largura e altura #E.G.: blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False) #Implementando o passe direto com nosso blob somente atraves das camadas de saída #Calculo ao mesmo tempo, tempo necessário para encaminhamento network.setInput(blob) #definindo blob como entrada para a rede output_from_network = network.forward(layers_names_output) #preparando listas para caixas delimitadoras detectadas bounding_boxes = [] confidences = [] class_numbers = [] #passando por todas as camadas de saída após o avanço da alimentação #fase de detecção dos objetos for result in output_from_network: for detected_objects in result: scores = detected_objects[5:] class_current = np.argmax(scores) confidence_current = scores[class_current] #eliminando previsões fracas com probablilidade minima if confidence_current > probability_minimum: box_current = detected_objects[0:4] * np.array([w, h, w, h]) x_center, y_center, box_width, box_height = box_current x_min = int(x_center - (box_width / 2)) y_min = int(y_center - (box_height / 2)) #Adicionando resultados em listas preparadas bounding_boxes.append([x_min, y_min, int(box_width), int(box_height)]) confidences.append(float(confidence_current)) class_numbers.append(class_current) results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, probability_minimum, threshold) #verificando se existe pelo menos um objeto detectado if len(results) > 0: for i in results.flatten(): x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1] box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3] colours_box_current = colours[class_numbers[i]].tolist() image_new = cv2.rectangle(image, (x_min, y_min), (x_min + box_width, y_min + box_height), colours_box_current, 2) #modificando porcentagem para 2 casas decimais. percent = str(confidences[i]) percent_formatted = int(percent[2:6]) percent_formatted = str(percent_formatted/100)+"%" #Preparando texto com rótulo e acuracia para o objeto detectado. text_box_current = "{}: {}".format(labels[int(class_numbers[i])], percent_formatted) # Coloca o texto nos objetos detectados cv2.putText(image, text_box_current, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colours_box_current, 2) escritor_csv.writerow( {"Objeto": text_box_current.split(":")[0], "Porcentagem": text_box_current.split(":")[1]}) lista_all.append(text_box_current.split(":")[0].capitalize() +" - "+ text_box_current.split(":")[1] +" de chance de ser este objeto.") contexto["img_path_new"] = "../../media/new"+filename #unindo caminho para salvar imagem com retangulo e descrição. img_path_new = os.path.join(fs.location, "new"+filename) cv2.imwrite(f"{img_path_new}", image_new)#salvando nova imagem. text_voice = "Esses foram alguns objetos identificados na imagem submetida: " for linha in lista_all: linha = linha.split(" - ") text_voice += f" {linha[0]}," text_voice += ", Todos objetos encontrados tem uma chance de estarem corretos entre 51 a 99%." text_voice += ", Para parar a fala pressione a tecla espaço." TTS = gTTS(text=text_voice, lang='pt-br') # Save to mp3 in current dir. TTS.save(os.path.join(fs.location, "audio.mp3")) audio_path = os.path.join(fs.location, "audio.mp3") contexto["audio_path"] = "../../media/audio.mp3" return redirect("../result") elif len(results) <= 0: contexto["img_path_new"] = "../../media/"+filename text_voice = "Não foram identificados objetos nesta imagem, para parar a fala pressione a tecla espaço." TTS = gTTS(text=text_voice, lang='pt-br') # Save to mp3 in current dir. TTS.save(os.path.join(fs.location, "audio.mp3")) audio_path = os.path.join(fs.location, "audio.mp3") contexto["audio_path"] = "../../media/audio.mp3" return redirect("../result") return render(req, 'submitimg.html')#acessa a página pedida
def convert_spacetx_json_to_csv(spacetx_json: str, csv: IOBase): spacetx_json_path = Path(spacetx_json).absolute() _, name, baseurl = slicedimage.io.resolve_path_or_url(spacetx_json) data = slicedimage.io.Reader.parse_doc(name, baseurl) assert isinstance(data, slicedimage.Collection) csvwriter = DictWriter(csv, [ "fov", "round", "ch", "zplane", "xc_min", "xc_max", "yc_min", "yc_max", "zc_min", "zc_max", "path", "sha256", ]) csvwriter.writeheader() seen_fov_nums: MutableMapping[int, str] = dict() for name, tileset in data.all_tilesets(): fov_num = int("".join( [character for character in name if character.isdigit()])) if fov_num in seen_fov_nums: raise ValueError( f"both {name} and {seen_fov_nums[fov_num]} resolve to the same fov number" ) seen_fov_nums[fov_num] = name for tile in tileset.tiles(): row = { 'fov': str(fov_num), 'round': str(tile.indices[Axes.ROUND]), 'ch': str(tile.indices[Axes.CH]), 'zplane': str(tile.indices[Axes.ZPLANE]), 'xc_min': str(tile.coordinates[Coordinates.X][0]), 'xc_max': str(tile.coordinates[Coordinates.X][1]), 'yc_min': str(tile.coordinates[Coordinates.Y][0]), 'yc_max': str(tile.coordinates[Coordinates.Y][1]), 'zc_min': str(tile.coordinates[Coordinates.Z][0]), 'zc_max': str(tile.coordinates[Coordinates.Z][1]), 'sha256': tile.sha256, } # getting the path is a brittle operation for closure_contents in tile._numpy_array_future.__closure__: cell_contents = closure_contents.cell_contents if isinstance(cell_contents, _FileLikeContextManager): path = Path(cell_contents.path).relative_to( spacetx_json_path.parent) break else: raise ValueError(f"Could not find the path") row['path'] = path csvwriter.writerow(row)
def _write_out_file(file_name: str, headers: List[str], content: List[dict]): with open(os.path.join(DATA_DIR, file_name), 'w') as out_file: writer = DictWriter(out_file, fieldnames=headers) writer.writeheader() writer.writerows(content)
def run_test_cases(test_cases_filename, running_real_tests): #== Section 1: Setup (Files & Data) ==# # First determine where the renders live parts = [test_cases_filename, 'refernence', 'renders'] references_folder = '_'.join(parts) if running_real_tests: parts = [test_cases_filename, 'renders'] # build some file paths renders_destination = '_'.join(parts) results_csv_filename = path.join(renders_destination, 'results.csv') results_txt_filename = path.join(renders_destination, 'results.txt') cmake_cache_src = path.join('build', 'CMakeCache.txt') cmake_cache_dst = path.join(renders_destination, 'CMakeCache.txt') # Read in the test configurations test_cases = [] with open(test_cases_filename, 'r') as csv_file: reader = DictReader(csv_file) test_cases = [row for row in reader] # Find the samesies for when we do the `idiff` matching_renders = find_matching_renders(test_cases) have_matching = (len(matching_renders) > 0) if have_matching: matching_msg = 'Verifying cases where renders should be the same:' else: matching_msg = 'WARNING: Not able to find any test cases that should produce the same result' # Save renders to a folder that's close to our filename Path(renders_destination).mkdir(exist_ok=True, parents=True) # Copy over the CMake build configuration, this way you know how the software was build for that test run copyfile(cmake_cache_src, cmake_cache_dst) # Create the results CSV file with open(results_csv_filename, 'w') as csv_file: fields = list(FIELDS) fields.append( 'render_time_ns') # Add on the render time as another column # If we're running the real tests, we also need to list PASS/FAIL status if running_real_tests: fields.append('matches_reference') writer = DictWriter(csv_file, fieldnames=fields) writer.writeheader() #== Section 2: Running Tests ==# # Run the renders! total_render_time_ns = 0 num_matches_reference = 0 num_total_cases = len(test_cases) print('Running %i test cases:' % num_total_cases) for case in test_cases: #== Section 2a: Run the Render ==# # Build arguments to run with id_num = case['id_num'] render_filename = '%s.png' % id_num render_filepath = path.join(renders_destination, render_filename) args = [ '--testing-mode', '-o', render_filepath, '--scene', case['scene_id'], '--random-seed', case['random_seed'], '--num-threads', case['num_threads'], '--depth', case['ray_depth'], '--num-samples', case['num_samples'], '--size', case['size'], ] # Do the render print(' Test %s/%s:' % (id_num, num_total_cases), end='', flush=True) output = check_output([PS_RAYTRACING_EXE, *args]).decode('utf-8').strip() parts = output.split(' ') # Verify things were outputted correctly, if not, the quit testing if (len(parts) != 2) or (parts[1] != 'ns'): print('Error in the text output from test %s: %s' % (id_num, output)) print("It's not as expected, quiting the test suite") exit(1) #== Section 2b: Check Render Results ==# # Test against the reference (maybe?) pass_fail_str = None if running_real_tests: ref_render_filepath = path.join(references_folder, '%s.png' % id_num) result = test_images_match(ref_render_filepath, render_filepath) if result: num_matches_reference += 1 pass_fail_str = 'PASS' else: pass_fail_str = 'FAIL' # print the result of the pass/fail print(' %s' % pass_fail_str, end='', flush=True) #== Section 2c: Report/Save Metrics ==# # Get the time render_time_ns = int(parts[0]) total_render_time_ns += render_time_ns render_time_s = render_time_ns / 1000000000.0 print(' [%.3f s]' % render_time_s, end='', flush=True) # Newline print('') # Write results to CSV with open(results_csv_filename, 'a') as csv_file: # Add on the "render time (ns)" column case['render_time_ns'] = render_time_ns # And maybe the pass/fail if pass_fail_str: case['matches_reference'] = pass_fail_str DictWriter(csv_file, fieldnames=case.keys()).writerow(case) #== Section 3: Verification of Matching Renders ==# # Verify renders that should be the same print('') print(matching_msg) matching_renders_results_txt = '' for pair in matching_renders: # Build the arguments for the `idff` command render_a = path.join(renders_destination, '%s.png' % pair[0]) render_b = path.join(renders_destination, '%s.png' % pair[1]) result = test_images_match(render_a, render_b) result_str = 'PASS' if result else 'FAIL' # Format the message to print (and save to report) case = ' %s -- %s : %s' % (render_a, render_b, result_str) print(case) matching_renders_results_txt += '%s\n' % case #== Section 4: Metrics Info ==# # Metrics total_time_str = 'Total render time was %.3f s' % (total_render_time_ns / 1000000000.0) print('') print(total_time_str) # Put some of those metrics in a file with open(results_txt_filename, 'w') as results_txt: if running_real_tests: results_txt.write('%s/%s tests passed\n' % (num_matches_reference, num_total_cases)) results_txt.write('%s (or %i ns)\n' % (total_time_str, total_render_time_ns)) results_txt.write('%s\n' % matching_msg) if have_matching: results_txt.write(matching_renders_results_txt)
with open("results/variance.csv", "w") as f: header = [ "name", "octtime mean", "octtime var", "dfstime mean", "dfstime var", "stdtime mean", "stdtime var", "biptime mean", "biptime var", "naivetime mean", "naivetime var", "apxtime mean", "apxtime var", "greedytime mean", "greedytime var", "octfirsttime mean", "octfirsttime var", "bipfirsttime mean", "bipfirsttime var", "rectime mean", "rectime var", "recocttime mean", "recocttime var", "recbiptime mean", "recbiptime var", "dfssize mean", "dfssize var", "stdsize mean", "stdsize var", "octsize mean", "octsize var", "n", "bfssize mean", "bfstime mean", "bfstime var" ] results = DictWriter(f, header) results.writeheader() for filename in os.listdir(filepath): if not filename.endswith(".s6"): continue res = {} graphname = filename.split(".s6")[0] print(graphname) res["name"] = graphname graph = read_sparse6("{}{}".format(filepath, filename)) print("n: {}".format(len(graph))) res["n"] = len(graph)
print(range_instance.generator()) range_instance.range_list() # %% CSV from csv import DictWriter, DictReader from pathlib import Path csv_file_path = Path("./test.csv") test_rows = [{"a": 1, "b": 2}, {"a": 40, "b": 34}] with open("test.csv", "w") as test_file: test_writer = DictWriter(test_file, fieldnames=["a", "b"]) test_writer.writeheader() for row in test_rows: test_writer.writerow(row) # %% def test(): row = list() def test_two(): row = None row.append(1) test_rows = []
def main(): single_counts = Counter() double_counts = Counter() all_labels = [] with open('processed/genres.json', 'r') as genres_file: genres = load(genres_file) id_to_genre = {} for genre, gid in genres.items(): id_to_genre[gid] = genre with open('processed/labels.txt', 'r') as labels_file: for line in labels_file: all_labels.append( list(map(id_to_genre.get, line.strip().split(',')))) for labels in all_labels: single_counts.update(labels) double_counts.update(map(tuple, map(sorted, combinations(labels, 2)))) print(single_counts) print() print(double_counts) workbook = Workbook('distribution.xlsx') worksheet = workbook.add_worksheet() bold = workbook.add_format({ 'bold': True, 'align': 'center', 'valign': 'vcenter' }) bold_header = workbook.add_format({ 'bold': True, 'align': 'center', 'valign': 'vcenter', 'bg_color': '#a0a0a0' }) bold_header_sideways = workbook.add_format({ 'bold': True, 'rotation': 90, 'align': 'center', 'valign': 'vcenter', 'bg_color': '#a0a0a0' }) row = 0 for genre2 in sorted(single_counts.keys()): worksheet.write(row, 0, genre2, bold_header) col = 1 for genre1 in filter(lambda g1: g1 < genre2, sorted(single_counts.keys())): co_occurrances = double_counts[(genre1, genre2)] ratio = (256 * co_occurrances) // ( single_counts[genre1] + single_counts[genre2] - co_occurrances) f = workbook.add_format({ 'bg_color': "#FF{0:02x}{0:02x}".format(255 - (ratio * 3)), 'align': 'center', 'valign': 'vcenter' }) worksheet.write(row, col, co_occurrances, f) col += 1 worksheet.write(row, row, single_counts[genre2], bold) row += 1 for col, genre1 in enumerate([''] + sorted(single_counts.keys())): worksheet.write(len(single_counts.keys()), col, genre1, bold_header_sideways) workbook.close() with open('distribution.csv', 'w') as dist_file: writer = DictWriter(dist_file, fieldnames=['genre'] + sorted(single_counts.keys())) writer.writeheader() for genre2 in sorted(single_counts.keys()): row = {'genre': genre2, genre2: single_counts[genre2]} for genre1 in filter(lambda g1: g1 < genre2, single_counts.keys()): row[genre1] = double_counts[(genre1, genre2)] writer.writerow(row)
def run_gavrptw(instance_name, unit_cost, init_cost, wait_cost, delay_cost, ind_size, pop_size, \ cx_pb, mut_pb, n_gen, export_csv=False, customize_data=False): '''gavrptw.core.run_gavrptw(instance_name, unit_cost, init_cost, wait_cost, delay_cost, ind_size, pop_size, cx_pb, mut_pb, n_gen, export_csv=False, customize_data=False)''' if customize_data: json_data_dir = os.path.join(BASE_DIR, 'data', 'json_customize') else: json_data_dir = os.path.join(BASE_DIR, 'data', 'json') json_file = os.path.join(json_data_dir, '{}.json'.format(instance_name)) instance = load_instance(json_file=json_file) if instance is None: return creator.create('FitnessMax', base.Fitness, weights=(1.0, )) creator.create('Individual', list, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Attribute generator toolbox.register('indexes', random.sample, range(1, ind_size + 1), ind_size) # Structure initializers toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.indexes) toolbox.register('population', tools.initRepeat, list, toolbox.individual) # Operator registering toolbox.register('evaluate', eval_vrptw, instance=instance, unit_cost=unit_cost, init_cost=init_cost, wait_cost=wait_cost, delay_cost=delay_cost) toolbox.register('select', tools.selRoulette) toolbox.register('mate', cx_partialy_matched) toolbox.register('mutate', mut_inverse_indexes) pop = toolbox.population(n=pop_size) # Results holders for exporting results to CSV file csv_data = [] print('Start of evolution') # Evaluate the entire population fitnesses = list(map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit print(' Evaluated {} individuals'.format(len(pop))) # Begin the evolution for gen in range(n_gen): print('-- Generation {} --'.format(gen)) # Select the next generation individuals offspring = toolbox.select(pop, len(pop)) # Clone the selected individuals offspring = list(map(toolbox.clone, offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < cx_pb: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < mut_pb: toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit print(' Evaluated {} individuals'.format(len(invalid_ind))) # The population is entirely replaced by the offspring pop[:] = offspring # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in pop] length = len(pop) mean = sum(fits) / length sum2 = sum(x * x for x in fits) std = abs(sum2 / length - mean**2)**0.5 print(' Min {}'.format(min(fits))) print(' Max {}'.format(max(fits))) print(' Avg {}'.format(mean)) print(' Std {}'.format(std)) # Write data to holders for exporting results to CSV file if export_csv: csv_row = { 'generation': gen, 'evaluated_individuals': len(invalid_ind), 'min_fitness': min(fits), 'max_fitness': max(fits), 'avg_fitness': mean, 'std_fitness': std, } csv_data.append(csv_row) print('-- End of (successful) evolution --') best_ind = tools.selBest(pop, 1)[0] print('Best individual: {}'.format(best_ind)) print('Fitness: {}'.format(best_ind.fitness.values[0])) print_route(ind2route(best_ind, instance)) print('Total cost: {}'.format(1 / best_ind.fitness.values[0])) if export_csv: csv_file_name = '{}_uC{}_iC{}_wC{}_dC{}_iS{}_pS{}_cP{}_mP{}_nG{}.csv'.format( instance_name, unit_cost, init_cost, wait_cost, delay_cost, ind_size, pop_size, cx_pb, mut_pb, n_gen) csv_file = os.path.join(BASE_DIR, 'results', csv_file_name) print('Write to file: {}'.format(csv_file)) make_dirs_for_file(path=csv_file) if not exist(path=csv_file, overwrite=True): with io.open(csv_file, 'wt', newline='') as file_object: fieldnames = [ 'generation', 'evaluated_individuals', 'min_fitness', 'max_fitness', 'avg_fitness', 'std_fitness', ] writer = DictWriter(file_object, fieldnames=fieldnames, dialect='excel') writer.writeheader() for csv_row in csv_data: writer.writerow(csv_row)
def register_user(username, password, userids): """Takes input, bcrypts it, and writes it to a file. Keyword arguments: username - The unique identifier for the user. password - Self-explanatory. userids - The unique identifiers that the user will have access to in order to retrieve encrypted data. Outupt: The function writes the username, hashed password, userids, a TOTP key and a generated API key to u_file as specified in the configuration above. """ if exists(u_file): try: user_file = open(u_file, 'r', encoding='ascii') except PermissionError: print('Unable to open the file. Check permissions.') exit(1) user_check = DictReader(user_file) for line in user_check: if username == line['username']: print('User already exists. Exiting.') exit(1) user_file.close() else: pass if validate_un(username): # Setting file info. f_headers = [ 'username', 'password', 'userids', 'apikey', 'totp', 'fl_tstamp', 'fl_count' ] if exists(u_file): pwd_file = open(u_file, 'a', newline='', encoding='ascii') writer = DictWriter(pwd_file, fieldnames=f_headers) else: pwd_file = open(u_file, 'w', newline='', encoding='ascii') writer = DictWriter(pwd_file, fieldnames=f_headers) writer.writeheader() # Converting input as needed. if validate_pw(password): pwd = password.encode(encoding='ascii') h_pwd = hashpw(b64encode(sha512(pwd).digest()), gensalt()) apikey = sha256(b64encode(urandom(32))).hexdigest() totp = b32encode(urandom(16)).decode('ascii').strip('=') else: print('Password does not meet password requirements') exit(1) # Writing input to file. if ',' in userids: writer.writerow({ 'username': username, 'password': h_pwd.decode(encoding='ascii'), 'userids': userids.split(','), 'apikey': apikey, 'topt': totp, 'fl_tstamp': 'None', 'fl_count': '0' }) else: writer.writerow({ 'username': username, 'password': h_pwd.decode(encoding='ascii'), 'userids': [userids], 'apikey': apikey, 'totp': totp, 'fl_tstamp': 'None', 'fl_count': '0' }) pwd_file.close() return {'apikey': apikey, 'totp': totp} else: print('User name is not in a valid format.') exit(1)
def convertFacts(self): finalFields = mappings.FACT_EXPORT_FIELDS shortMap, issueMap, orgMap = {}, {}, {} with open(self.expFolder + 'org_names.csv', 'r') as namesIn: reader = DictReader(namesIn) [ orgMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.expFolder + 'people_names.csv', 'r') as namesIn: reader = DictReader(namesIn) [ shortMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.expFolder + 'issue_names.csv', 'r') as issuesIn: reader = DictReader(issuesIn) [ issueMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.facts[0], 'r') as factsIn: reader = DictReader(factsIn) contents = [dict(a) for a in list(reader)] with open(self.expFolder + 'doc_shorts.csv', 'r') as docShortsIn: reader = DictReader(docShortsIn) docShorts = [] [docShorts.append(a['Short Name']) for a in list(reader)] for row in contents: for field in mappings.FACT_DISCARD_FIELDS: row.pop(field, '') allFields = list(contents[0].keys()) for field in allFields: if not any(entry[field] for entry in contents): for row in contents: row.pop(field) for field in list(contents[0].keys()): if field not in mappings.FACT_BUILT_INS: fieldOut = open(self.impFolder + 'fact_custom_props.txt', 'a') fieldOut.write("{0}\n".format(field)) fieldOut.close() for row in contents: charList = [] for short, full in orgMap.items(): if short in row['Fact Text']: row['Fact Text'] = row['Fact Text'].replace(short, full) charList.append(full) for short, full in shortMap.items(): if short in row['Fact Text']: row['Fact Text'] = row['Fact Text'].replace(short, full) charList.append(full) for short, full in issueMap.items(): row['Linked Issues'] = row['Linked Issues'].replace( short, full) row['Issues'] = row.pop('Linked Issues').replace(',', ';') row['Characters'] = '; '.join(charList) row['Description'] = row.pop('Fact Text') row['Title'] = ' '.join(row['Description'].split()[:8]) row['Undisputed'] = 'No' if row['Date & Time'] == 'TBD': row['Start Date'] = '' row['End Date'] = '' else: row['Start Date'] = fixDate(row['Date & Time']) row.pop('Date & Time') row['Author'] = '' row['Annotation Sources'] = '' sourceList = [] for doc in docShorts: if doc in row['Source(s)']: sourceList.append(doc) row['Source(s)'] = row['Source(s)'].replace(doc, '') row['Source(s)'] = re.sub(r'\[.*\]', '', row['Source(s)']) row['Full-Text Sources'] = '; '.join(sourceList) row['Full-Text Sources'] += '; {0}'.format( row['Source(s)'].strip()) row.pop('Source(s)') finalFields = list(contents[0].keys()) factWriter = DictWriter(open(self.facts[1], 'w'), lineterminator='\n', fieldnames=finalFields) factWriter.writeheader() for row in contents: factWriter.writerow(row)
def convertDocs(self): shortMap, issueMap, orgMap = {}, {}, {} issueLists = [] issuePrefix = "DLI_" with open(self.expFolder + 'org_names.csv', 'r') as namesIn: reader = DictReader(namesIn) [ orgMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.expFolder + 'people_names.csv', 'r') as namesIn: reader = DictReader(namesIn) [ shortMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.expFolder + 'issue_names.csv', 'r') as issuesIn: reader = DictReader(issuesIn) [ issueMap.update({a['Short Name']: a['Full Name']}) for a in list(reader) ] with open(self.documents[0]) as rawIn: reader = DictReader(rawIn) contents = [dict(a) for a in list(reader)] for entry in contents: for field in mappings.DOC_DISCARD_FIELDS: entry.pop(field) issueLists.append( [a.strip() for a in entry['Linked Issues'].split(',')]) issueMax = len(max(issueLists, key=len)) allFields = list(contents[0].keys()) for field in allFields: if not any(entry[field] for entry in contents): for row in contents: row.pop(field) for entry in contents: if entry['Date'] in ['TBD', '']: entry['Date'] = '' else: entry['Date'] = fixDate(entry['Date']) for suffix in mappings.DOC_SUFFIXES: entry['Linked File'] = entry['Linked File'].replace(suffix, '') for short, full in shortMap.items(): for key, value in entry.items(): if key == 'Linked File': continue else: entry[key] = value.replace(short, full) for short, full in orgMap.items(): for key, value in entry.items(): if key == 'Linked File': continue else: entry[key] = value.replace(short, full) for field in list(contents[0].keys()): if field not in mappings.DOC_BUILT_INS: fieldOut = open(self.impFolder + 'doc_custom_props.txt', 'a') fieldOut.write("{0}\n".format(field)) fieldOut.close() finalFields = list(contents[0].keys()) finalFields.remove('Linked Issues') for i in range(0, issueMax): finalFields.append("{0}{1}".format(issuePrefix, i)) for row in contents: if not row['Linked Issues'] == '': for index, issue in enumerate(row['Linked Issues'].split(',')): try: row.update({ '{0}{1}'.format(issuePrefix, index): issueMap[issue.strip()] }) except KeyError: # in case there are dupe issues(replaced above) and org names row.update({ '{0}{1}'.format(issuePrefix, index): issue.strip() }) row.pop('Linked Issues') for key, value in row.items(): if key == 'Linked File': continue else: row[key] = value.replace(',', ';') writer = DictWriter(open(self.documents[1], 'w'), lineterminator='\n', fieldnames=finalFields) docShortWriter = DictWriter(open(self.expFolder + 'doc_shorts.csv', 'w'), lineterminator='\n', fieldnames=['Short Name']) writer.writeheader() docShortWriter.writeheader() for row in contents: writer.writerow(row) docShortWriter.writerow({'Short Name': row['Short Name']})
def main(corrected_csv, cluster_info, output_prefix, fasta_file=None, gff_file=None, faa_file=None): # read corrected CSV reader = DictReader(open(corrected_csv), delimiter='\t') for k in CORRECTED_CSV_FILELDS: if k not in reader.fieldnames: print("The following fields must exist in {0}!\n{1}".format( corrected_csv, "\n".join(CORRECTED_CSV_FILELDS))) sys.exit(-1) per_unique = {} # tag -> record per_unique_count = Counter() # tag -> number of duplicates per_pbid = defaultdict(lambda: { 'gene': None, 'transcript': None, 'clusters': [] }) # pbid --> list of clusters it is in for r in reader: tag = "{bc}-{umi}-{gene}".format(bc=r['BC_ed'], umi=r['UMI_ed'], gene=r['gene']) per_unique[tag] = r per_unique_count[tag] += 1 # now link barcode to cell type, also PCR dup counts for tag in per_unique: c = cluster_info[per_unique[tag]['BC_ed']] rec = per_unique[tag] rec['cluster'] = c rec['num_dups'] = per_unique_count[tag] pbid = rec['pbid'] if pbid in per_pbid: per_pbid[pbid]['clusters'].add(c) else: per_pbid[pbid] = { 'gene': rec['gene'], 'transcript': rec['transcript'], 'clusters': set([c]) } # write out de-dup CSV file with open(output_prefix + '.csv', 'w') as f: writer = DictWriter(f, CORRECTED_CSV_FILELDS + ['cluster', 'num_dups'], delimiter='\t', extrasaction='ignore') writer.writeheader() keys = per_unique.keys() for k in sorted(keys): writer.writerow(per_unique[k]) if fasta_file is not None: f_d = {} # cluster --> file handle # writer pbid master file with open(output_prefix + '.fasta', 'w') as f: for r in SeqIO.parse(open(fasta_file), 'fasta'): if r.id in per_pbid: newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\ pbid=r.id, gene=per_pbid[r.id]['gene'], transcript=per_pbid[r.id]['transcript'], clusters=";".join(per_pbid[r.id]['clusters'])) f.write(">{0}\n{1}\n".format(newid, r.seq)) for c in per_pbid[r.id]['clusters']: if c not in f_d: f_d[c] = open( "{o}.{c}.fasta".format(o=output_prefix, c=c), 'w') f_d[c].write(">{0}\n{1}\n".format(newid, r.seq)) if faa_file is not None: f_d = {} # cluster --> file handle # writer pbid master file with open(output_prefix + '.faa', 'w') as f: for r in SeqIO.parse(open(faa_file), 'fasta'): if r.id in per_pbid: newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\ pbid=r.id, gene=per_pbid[r.id]['gene'], transcript=per_pbid[r.id]['transcript'], clusters=";".join(per_pbid[r.id]['clusters'])) f.write(">{0}\n{1}\n".format(newid, r.seq)) for c in per_pbid[r.id]['clusters']: if c not in f_d: f_d[c] = open( "{o}.{c}.faa".format(o=output_prefix, c=c), 'w') f_d[c].write(">{0}\n{1}\n".format(newid, r.seq)) for handle in f_d.values(): handle.close() if gff_file is not None: f_d = {} # cluster --> file handle # writer pbid master file with open(output_prefix + '.gff', 'w') as f: for r in collapseGFFReader(gff_file): if r.seqid in per_pbid: newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\ pbid=r.seqid, gene=per_pbid[r.seqid]['gene'], transcript=per_pbid[r.seqid]['transcript'], clusters=";".join(per_pbid[r.seqid]['clusters'])) write_collapseGFF_format(f, r) for c in per_pbid[r.seqid]['clusters']: if c not in f_d: f_d[c] = open( "{o}.{c}.gff".format(o=output_prefix, c=c), 'w') write_collapseGFF_format(f_d[c], r) for handle in f_d.values(): handle.close()
def write_file(header, rows, file_path): with open(file_path, 'w') as archivo: writer = DictWriter(archivo, fieldnames=header) writer.writeheader() writer.writerows(rows)
def main( input_prefix: str = typer.Argument(...), output_prefix: str = typer.Argument(...), cpus: int = typer.Option(10, "--cpus", "-n", help="Number of CPUS"), version: bool = typer.Option( None, "--version", callback=version_callback, is_eager=True, help="Prints the version of the SQANTI3 package.", ), ) -> None: info = {} for r in SeqIO.parse(open(f"{input_prefix}.lima.clips"), "fasta"): zmw = r.id[: r.id.rfind("/")] e = int(r.id.split("/")[2].split("_")[1]) if e < 100: info[zmw] = "F5" if r.description.split("bc:")[-1] == "0" else "R3" logger.info("Finished reading lima clips file.") num_records = len(info) chunk_size = (num_records // cpus) + (num_records % cpus) offset_start = 0 input_bam = f"{input_prefix}.bam" pools = [] onames = [] while offset_start <= num_records: oname = f"{output_prefix}.{str(offset_start)}" p = Process( target=deconcat_worker, args=(input_bam, offset_start, offset_start + chunk_size, oname, info), ) p.start() logger.info( f"Launching deconcat worker for records {offset_start}-{offset_start + chunk_size}..." ) offset_start += chunk_size pools.append(p) onames.append(oname) for p in pools: p.join() logger.info("All deconcat workers done. Collecting results.") with open(f"{output_prefix}.csv", "w") as f_csv: writer = DictWriter(f_csv, CSV_FIELDS, delimiter=",") writer.writeheader() bams = [] for oname in onames: bams.append(f"{oname}.bam") for r in DictReader(open(f"{oname}.csv"), delimiter=","): writer.writerow(r) logger.info("Merging bam files...") reader = pysam.AlignmentFile(bams[0], "rb", check_sq=False) with pysam.AlignmentFile(f"{output_prefix}.bam", "wb", header=reader.header) as f: for bam in bams: for r in pysam.AlignmentFile(bam, "rb", check_sq=False): x = pysam.AlignedSegment.from_dict(r.to_dict(), r.header) f.write(x) # pysam.merge(output_prefix+'.bam', *bams) for oname in onames: Path(f"{oname}.bam").unlink() Path(f"{oname}.csv").unlink() logger.info(f"Output written to: {output_prefix}.bam, {output_prefix}.csv")
def process_ser_log_parser(port): node_id = 0 role = '' with Serial(port, BAUDRATE) as ser: # discover node_id while True: _, module, content = LogParser.parse_log_line( ser.readline().decode()) if module is LogModule.MAIN: node_id = LogParser.parse_node_id(content) if node_id: break # discover role while True: _, module, _ = LogParser.parse_log_line(ser.readline().decode()) if module is LogModule.MASTER or module is LogModule.SLAVE: role = module print('node_id: {}, role: {}'.format(node_id, role.value)) break # collect reports csv_file = '{}.csv'.format('master' if role is LogModule.MASTER else 'slave-{}'.format(node_id)) b64_file = '{}.b64'.format('master' if role is LogModule.MASTER else 'slave-{}'.format(node_id)) with open('{}-{}'.format('rss', csv_file), 'w') as rss_csv, \ open('{}-{}'.format('acc', csv_file), 'w') as acc_csv, \ open('{}-{}'.format('rss', b64_file), 'w') as rss_b64, \ open('{}-{}'.format('acc', b64_file), 'w') as acc_b64: rss_writer = DictWriter(rss_csv, fieldnames=[ 'source_id', 'seq_num', 'time_stamp', 'rss_local', 'rss_remote' ]) acc_writer = DictWriter( acc_csv, fieldnames=[a.name.lower() for a in AccAxis]) rss_writer.writeheader() acc_writer.writeheader() # write field names while True: level, module, content = LogParser.parse_log_line( ser.readline().decode()) if level is LogLevel.REP: report_type, report_data = LogParser.parse_report(content) assert (isinstance(report_type, ReportType)) if report_type is ReportType.RSS: rss_b64.write('{}\n'.format(report_data)) rss_writer.writerow( LogParser.parse_rss(b64_decode(report_data))) else: acc_b64.write('{}\n'.format(report_data)) acc_writer.writerows( LogParser.parse_acc_xyz(b64_decode(report_data))) elif (module is LogModule.MASTER or module is LogModule.SLAVE ) and content == 'Process started': # restart detected, flush and terminate rss_csv.flush() acc_csv.flush() rss_b64.flush() acc_b64.flush() break
def csv_results(self, csv_file, histogram_size=None): histogram_size = histogram_size or 10 bins = range(histogram_size) bins.insert(0, "idea") bins.extend(["avg", "std_dev"]) dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';') dw.writeheader() by_idea = self._gather_results() values = { votable_id: self.results_for(voting_results, histogram_size) for (votable_id, voting_results) in by_idea.iteritems() } idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter( Idea.id.in_(by_idea.keys()))) idea_names = { id: name.encode('utf-8') for (id, name) in idea_names.iteritems()} ordered_idea_ids = Idea.visit_idea_ids_depth_first( AppendingVisitor(), self.get_discussion_id()) ordered_idea_ids = [id for id in ordered_idea_ids if id in values] for idea_id, base in ordered_idea_ids: base = values[idea_id] r = dict(enumerate(base['histogram'])) r['idea'] = idea_names[idea_id] r['avg'] = base['avg'] r['std_dev'] = base['std_dev'] dw.writerow(r)