def dump_to_csv(filepath, headings, data): with open(filepath, "w") as f: fieldnames = [key for key in headings] writer = Writer(f) writer.writerow(fieldnames) for row in data: writer.writerow([row[key] for key in headings])
def output_for_curation(trait: Trait, curation_writer: csv.writer): """ Write any non-finished Zooma or OxO mappings of a trait to a file for manual curation. Also outputs traits without any ontology mappings. :param trait: A Trait with no finished ontology mappings in finished_mapping_set :param curation_writer: A csv.writer to write non-finished ontology mappings for manual curation """ # Traits which are associated with NT expansion variants are of highest importance, and they need to be curated # even if the number of records they are associated with is low. trait_frequency = 'NT expansion' if trait.associated_with_nt_expansion else trait.frequency output_row = [trait.name, trait_frequency] zooma_mapping_list = get_mappings_for_curation(trait.zooma_result_list) for zooma_mapping in zooma_mapping_list: cell = [zooma_mapping.uri, zooma_mapping.ontology_label, str(zooma_mapping.confidence), zooma_mapping.source, 'EFO_CURRENT' if zooma_mapping.in_efo else 'NOT_CONTAINED'] output_row.append("|".join(cell)) oxo_mapping_list = get_mappings_for_curation(trait.oxo_result_list) for oxo_mapping in oxo_mapping_list: cell = [str(oxo_mapping.uri), oxo_mapping.ontology_label, str(oxo_mapping.distance), oxo_mapping.query_id, 'EFO_CURRENT' if oxo_mapping.in_efo else 'NOT_CONTAINED'] output_row.append("|".join(cell)) curation_writer.writerow(output_row)
def output_for_curation(trait: Trait, curation_writer: csv.writer): """ Write any non-finished Zooma or OxO mappings of a trait to a file for manual curation. Also outputs traits without any ontology mappings. :param trait: A Trait with no finished ontology mappings in finished_mapping_set :param curation_writer: A csv.writer to write non-finished ontology mappings for manual curation """ output_row = [trait.name, trait.frequency] zooma_mapping_list = get_mappings_for_curation(trait.zooma_result_list) for zooma_mapping in zooma_mapping_list: cell = [zooma_mapping.uri, zooma_mapping.ontology_label, str(zooma_mapping.confidence), zooma_mapping.source, 'EFO_CURRENT' if zooma_mapping.in_efo else 'NOT_CONTAINED'] output_row.append("|".join(cell)) oxo_mapping_list = get_mappings_for_curation(trait.oxo_result_list) for oxo_mapping in oxo_mapping_list: cell = [str(oxo_mapping.uri), oxo_mapping.ontology_label, str(oxo_mapping.distance), oxo_mapping.query_id, 'EFO_CURRENT' if oxo_mapping.in_efo else 'NOT_CONTAINED'] output_row.append("|".join(cell)) curation_writer.writerow(output_row)
def MaybeTranscodeMp3(path: str, out_csv: csv.writer, max_bitrate: int = 192, lame_option: int = 3) -> bool: """ See https://trac.ffmpeg.org/wiki/Encode/MP3 for a table of variable bitrate values. """ output = subprocess.check_output(['file', path], universal_newlines=True) match = re.search(r' (\d+) kbps', output) bit_rate = int(match.group(1)) if bit_rate > max_bitrate: size_before = os.path.getsize(path) system.ProcessFileAndReplace( path, lambda inpath, outpath: TranscodeMp3(inpath, outpath, lame_option), tempfile_prefix='phd_system_machines_florence_transcode_musiclib_', tempfile_suffix='.mp3') size_after = os.path.getsize(path) logging.info(f'%s changed from %s to %s (%.1f%% reduction)', os.path.basename(path), humanize.naturalsize(size_before), humanize.naturalsize(size_after), (1 - (size_after / size_before)) * 100) out_csv.writerow( [datetime.datetime.now(), path, bit_rate, size_before, size_after]) return True else: logging.debug('Ignoring %s', path) return False
def parse_results(results: dict, writer: csv.writer) -> None: """ Given a dict of results from the news-api client. Parse through each article and use the writer to write a row to the csv. """ for article in results: try: source = article['source'] except KeyError as e: print("KeyError: source not in results") source_id, source_name = '', '' else: source_id = article['source'].get('id', '') source_name = article['source'].get('name', '') author = article.get('author', '') title = article.get('title', '') content = article.get('content', '') url = article.get('url', '') published_at = article.get('publishedAt', '') writer.writerow([ source_id, source_name, author, title, content, url, published_at ]) COUNTER[0] += 1
def output_trait_mapping(trait: Trait, mapping_writer: csv.writer): """ Write any finished ontology mappings for a trait to a csv file writer. :param trait: A trait with finished ontology mappings in finished_mapping_set :param mapping_writer: A csv.writer to write the finished mappings """ for ontology_entry in trait.finished_mapping_set: mapping_writer.writerow([trait.name, ontology_entry.uri, ontology_entry.label])
def _write_table_headers(self, writer: csv.writer): """ Writes the headers of the data table to the CSV file. :param writer: The ``csv.writer`` object """ elems = self.soup.select(".table-scroll thead tr th") headers = [e.getText() for e in elems] writer.writerow(headers)
def write_events_to_csv(output_file, calendar, property_names): writer = CSVWriter(output_file) writer.writerow(property_names) for event in get_events(calendar): props = [] for property_name in property_names: property_value = get_property_value(event, property_name) props.append(property_value) writer.writerow(props)
def write_header(results:dict, csvwr:csv.writer, values:int): row=[""] for lvl, algs in results.items(): for alg in algs.keys(): header=[lvl+"/"+alg] for x in range(0, values-1): header.append("") row.extend(header) csvwr.writerow(row)
def _write_table_rows(self, writer: csv.writer): """ Iterates through the rows of the current data table. The data in each row is written to the CSV file. :param writer: The ``csv.writer`` object """ row_elems = self.soup.select(".table-scroll tbody tr") for row in row_elems: elems = row.select("td") items = [e.getText() for e in elems] writer.writerow(items)
def _iter_xsv_chunks(chunks, prefix="", delimiter=",", quoting=2, lineterminator=None): """Iterate chunks in `delimiter`-separated format""" fmtparams = dict(delimiter=delimiter, quoting=quoting) if lineterminator: fmtparams["lineterminator"] = lineterminator with StringIO() as handle: writer = CSVWriter(handle, **fmtparams) for chunk in chunks: writer.writerow(chunk) handle.seek(0) yield prefix + handle.getvalue() handle.truncate()
def _initialize_index_file(self, index_file: str): """Initialize the index if it does not exist yet :param index_file: The name and path of the index. :type index_file: str """ assert not os.path.exists( index_file), 'Cannot initialize, file already exists.' with open(index_file, 'wt') as out: writer = CsvWriter(out, delimiter='\t') writer.writerow( ["Description", "Filename", "Location", "DateTime"])
def sentence_level_writer( writer: csv.writer, train_dataset: conlldataloader.ConllDataSet, start_id: int = 0, ) -> None: for i, data_point in enumerate(tqdm(train_dataset)): input_arr = data_point['input'] input_word_arr = [ point[constants.CONLL2003_WORD] for point in input_arr ] input_str = ' '.join(input_word_arr) input_id = start_id + i writer.writerow([input_id, input_str])
def calculate_ssim_across_two_lists(list_one: list, list_two: list, writer: csv.writer): num_images = len(list_one) for image_index in range(num_images): ssim_value = compare_ssim(list_one[image_index], list_two[image_index]) sample, modality_one, tile = blk.file_name_parts(list_one[image_index]) modality_two = blk.file_name_parts(list_two[image_index])[1] mouse, slide = sample.split('-') modality_pair = modality_one + '-' + modality_two writer.writerow([mouse, slide, tile, modality_pair, ssim_value])
def serialize(cls, value, schema, columns=None, path=None): if path: for key in path.split('.'): schema = schema.get(key) if schema and key in value: value = value[key] else: raise ValueError(path) if not isinstance(value, (list, tuple)): raise ValueError(value) if not isinstance(schema, Sequence): raise ValueError(schema) structure = schema.item if not isinstance(structure, Structure): raise ValueError(structure) if isinstance(columns, string): columns = columns.split(',') if not columns: columns = sorted(structure.structure.keys()) cells = [] for column in columns: field = structure.get(column) if field and not field.structural: cells.append((column, field)) content = StringIO() writer = Writer(content, cls.Dialect) writer.writerow([cell[0] for cell in cells]) for item in value: row = [] for column, field in cells: candidate = item.get(column) if candidate is None: row.append('') elif field.type == 'boolean': if candidate: row.append('true') else: row.append('false') else: row.append(str(candidate)) else: writer.writerow(row) return content.getvalue()
def _write_meta_information(self, writer: csv.writer) -> csv.writer: writer.writerow(["Corpus", "", self._corpus_name]) writer.writerow(["User", "", self._author]) writer.writerow(["Code", "", self._CODE]) writer.writerow(["Download", "", self._DOWNLOAD]) return writer
def csvify(data, status_code=200): if isinstance(data, list): fo = StringIO() writer = CSVWriter(fo) for row in data: writer.writerow(row) data = fo.getvalue() elif not isinstance(data, (str, unicode)): raise ValueError("Malformed CSV data") return Response( status_code=status_code, headers={'Content-Type': 'text/csv'}, body=data, )
def traceroute(files, drop_private): """ Convert traceroute results from ND-JSON to CSV. .. warning:: Late packets are dropped. Columns: ``timestamp, msm_id, prb_id, from_ip, to_ip, paris_id, hop1_1, ..., hop32_3`` """ tfip = TracerouteFlatIPTransformer(drop_dup=True, drop_late=True, drop_private=drop_private) # TODO: Proper context manager? output = open(f"traceroutes_{int(dt.datetime.now().timestamp())}.csv", "w") writer = CSVWriter(output) reader = AtlasRecordsReader.all(files) hops = [[f"hop{i}_{j}" for j in range(1, 4)] for i in range(1, 33)] writer.writerow([ "timestamp", "msm_id", "prb_id", "from_ip", "to_ip", "paris_id", *itertools.chain(*hops), ]) for record in tqdm(reader, desc=""): record = tfip(record) hops = record["hops"] assert all(len(hop) == 3 for hop in hops) # Pad hops if len(hops) < 32: hops += [[None] * 3] * (32 - len(hops)) row = ( record["timestamp"], record["msm_id"], record["prb_id"], record["from"], record["dst_addr"], record["paris_id"], *itertools.chain(*hops), ) writer.writerow(row)
def write_serie(self, serie: pd.Series, periodicity: str, fields: dict, writer: csv.writer): field_id = fields[serie.name] # Filtrado de NaN serie = serie[serie.first_valid_index():serie.last_valid_index()] df = serie.reset_index().apply(self.rows, axis=1, args=(self.fields_data, field_id, periodicity)) serie = pd.Series(df.values, index=serie.index) for row in serie: writer.writerow(row)
def normalize_csv_body(is_body: bool, row: Sequence[str], csv_writer: csv.writer) -> None: if is_body: timestamp, address, zipcode, full_name, foo, bar, total, notes = \ row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7] foo_duration = normalize_duration(foo, 'foo') bar_duration = normalize_duration(bar, 'bar') csv_writer.writerow([ normalize_timestamp(timestamp), normalize_address(address), normalize_zipcode(zipcode), normalize_name(full_name), foo_duration, bar_duration, normalize_total_duration(foo_duration, bar_duration), normalize_notes(notes) ])
def write_verb_list(self, writer: csv.writer, data: list) -> int: """Sum up all the verbs for each key within a single sample in the data list of samples. This list is a list of dictionaries. Args: writer (csv.writer): CSV writer object data (str): List of dictionaries Returns: int: Number of found verbs """ sub_key_list = list(data[0].keys()) n_verbs_total = 0 for sub_key in sub_key_list: n_verbs = self.count_words(data, sub_key) writer.writerow(["#{}".format(sub_key), n_verbs]) n_verbs_total += n_verbs return n_verbs
def add_to_index(self, index_file: str = "../index.csv"): """Add current picture to the index. :param index_file: The name and path of the index file, defaults to "../index.csv" :type index_file: str, optional """ if not os.path.exists(index_file): self._initialize_index_file(index_file) description = '' filename = os.path.basename(self.filename) location = self.filename try: date_and_time = self.DateTime.strftime('%Y:%m:%d %H:%M:%S') except AttributeError: date_and_time = '' with open(index_file, 'a') as out: writer = CsvWriter(out, delimiter='\t') writer.writerow([description, filename, location, date_and_time])
def main(): args = mkArguments() image = io.imread(args.image) plt.imshow(image) points = [] def onButton(event): x, y = int(event.xdata), int(event.ydata) points.append((x, y)) print('Rectangle[{}]: at {},{}'.format('start' if len(points) % 2 != 0 else 'end', x, y)) def onKey(event): if event.key == 'q' and points: points.pop() elif event.key == 'w': exit('Error: discarding rectangles, label again') elif event.key == 'e': plt.close() plt.connect('button_press_event', onButton) plt.connect('key_press_event', onKey) plt.title('mouse press: add point, q: pop point, w: discard, e: next') plt.axis('off') plt.show() if not points or len(points) % 2 != 0: exit('Error: unable to make up rectangles from no or odd number of points') with open(args.labelFile, 'a') as handle: writer = CSVWriter(handle) for i in range(0, len(points) - 1, 2): p0, p1 = points[i], points[i+1] (x0, y0) = np.min([p0, p1], axis=0) (x1, y1) = np.max([p0, p1], axis=0) # Region is made up of: path, x0, y0, x1, y1 (bottom left, top right) writer.writerow([abspath(args.image), x0, y0, x1, y1])
def Sample(self, writer: csv.writer, num_rows: int) -> None: """Generate num_rows random rows and write to given CSV.""" # LifeCycle CSV file has whitespace after commas. writer.writerow([ 'START DATE(UTC)', ' END DATE(UTC)', ' START TIME(LOCAL)', ' END TIME(LOCAL)', ' DURATION', ' NAME', ' LOCATION', ' NOTE', ]) writer.writerow([]) start_time = self.start_time_seconds_since_epoch + random.randint( 0, 3600 * 23) for _ in range(num_rows): start_time += random.randint(1, 3600) end_time = start_time + random.randint(60, 3600 * 8) writer.writerow([ time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)), # START DATE(UTC) time.strftime(' %Y-%m-%d %H:%M:%S', time.localtime(end_time)), # END DATE(UTC) ' unused', # START TIME(LOCAL) ' unused', # END TIME(LOCAL) ' unused', # DURATION ' ' + random.choice(self.names), # NAME ' ' + random.choice(self.locations), # LOCATION ' unused', # NOTE ])
def document_level_writer( writer: csv.writer, train_dataset: conlldataloader.ConllDataSet, start_id: int = 0, ) -> None: def is_document_start(input_word_arr: List[str]) -> bool: return len(input_word_arr) == 1 and input_word_arr[0] == '-DOCSTART-' curr_document = [] document_id = start_id for i, data_point in enumerate(tqdm(train_dataset)): input_arr = data_point['input'] input_word_arr = [ point[constants.CONLL2003_WORD] for point in input_arr ] if is_document_start(input_word_arr) and len(curr_document) > 0: writer.writerow([document_id, ' '.join(curr_document)]) document_id += 1 curr_document = [] else: input_str = ' '.join(input_word_arr) curr_document.append(input_str)
def __scan_directory_helper(self, parent_dir: str, dir_name: str, csv_writer: csv.writer) -> None: """ Scans recursively a list of directories and stores filenames and directories in CSV format. :param str parent_dir: The name of the parent directory. :param str dir_name: The name of the directory. :param csv.writer csv_writer: The CSV writer. """ target_name = os.path.join(parent_dir, dir_name) if dir_name else parent_dir first_file = True sub_dir_names = [] for entry in os.scandir(target_name): if entry.is_file(): self.__file_count += 1 if first_file: first_file = False self.__entry_seq += 1 csv_writer.writerow( (self.__entry_seq, entry.inode(), dir_name, entry.name)) if entry.name not in [ 'attrib', 'backupInfo', 'backuppc-clone.csv' ]: self.progress.advance() elif entry.is_dir(): sub_dir_names.append(entry.name) for sub_dir_name in sorted(sub_dir_names): self.__entry_seq += 1 self.__dir_count += 1 csv_writer.writerow( (self.__entry_seq, None, dir_name, sub_dir_name)) self.__scan_directory_helper(parent_dir, os.path.join(dir_name, sub_dir_name), csv_writer)
def _write_data(self, writer: csv.writer, data: list, analysis_data: dict) -> csv.writer: writer.writerow(["Found", len(data)]) writer.writerow(["\n"]) writer.writerow(["Index", "Sample", "Text", "Year", "Country", "Unmatch"]) # Write samples from data list for idx, phrase in enumerate(data): # Write data 1-indexed writer.writerow([idx+1, phrase['label'], phrase['text'], phrase['date'], phrase['country'], analysis_data['Unmatch'][idx]]) return writer
def MaybeTranscodeMp3(path: str, out_csv: csv.writer, max_bitrate: int = 192, lame_option: int = 3) -> bool: """ See https://trac.ffmpeg.org/wiki/Encode/MP3 for a table of variable bitrate values. """ output = subprocess.check_output(["file", path], universal_newlines=True) match = re.search(r" (\d+) kbps", output) bit_rate = int(match.group(1)) if bit_rate > max_bitrate: size_before = os.path.getsize(path) system.ProcessFileAndReplace( path, lambda inpath, outpath: TranscodeMp3(inpath, outpath, lame_option), tempfile_prefix="phd_system_machines_florence_transcode_musiclib_", tempfile_suffix=".mp3", ) size_after = os.path.getsize(path) app.Log( 1, f"%s changed from %s to %s (%.1f%% reduction)", os.path.basename(path), humanize.BinaryPrefix(size_before, "B"), humanize.BinaryPrefix(size_after, "B"), (1 - (size_after / size_before)) * 100, ) out_csv.writerow( [datetime.datetime.now(), path, bit_rate, size_before, size_after]) return True else: app.Log(2, "Ignoring %s", path) return False
def main(): if len(argv) < 3: stderr.write( f"USAGE: {argv[0]} [path to road network] [path to emissions directory]\n" ) exit(1) with open(argv[1], 'r', encoding='utf-8') as file: network = RoadNetwork(file) result = [{}] * 24 for hour in range(0, 24): maps = {} for day in range(4, 11): with open(osp.join(argv[2], f'2017-07-{day:02d}_{hour:02d}_energy.csv'), 'r', encoding='utf-8') as file: emissions = EmissionsSnapshot.load(file) maps[day] = heatmap.comp_values(network, emissions)[0] for prev in range(4, day): result[hour][(prev, day)] = heatmap.comp_diff(maps[prev], maps[day]) print( f"Computed difference for hour {hour:02d}, days {prev:02d} and {day:02d}" ) with open('data/heatmap_diffs.csv', 'w', newline='', encoding='utf-8') as csv_file: csv_writer = CSVWriter(csv_file) csv_writer.writerow(["Day 1", "Day 2", "Hour", "Difference"]) for hour, diffs in enumerate(result): for day1, day2 in diffs.keys(): csv_writer.writerow([day1, day2, hour, diffs[(day1, day2)]])
def __scan_directory_helper2(self, parent_dir: str, dir_name: str, csv_writer: csv.writer) -> None: """ Scans recursively a list of directories and stores filenames and directories in CSV format. :param str parent_dir: The name of the parent directory. :param str dir_name: The name of the directory. :param csv.writer csv_writer: The CSV writer. """ sub_dir_names = [] for entry in os.scandir(os.path.join(parent_dir, dir_name)): if entry.is_file(): self.__count += 1 csv_writer.writerow((entry.inode(), dir_name, entry.name)) elif entry.is_dir(): sub_dir_names.append(entry.name) for sub_dir_name in sub_dir_names: self.__scan_directory_helper2(parent_dir, os.path.join(dir_name, sub_dir_name), csv_writer) self.__progress.advance()
def main(): args = get_args() data = extract_columns(args, get_data(args)) csv_writer = CsvWriter(sys.stdout) # Algorithms headers csv_writer.writerow(['row'] + flatten([algorithm] * count_columns(d) for algorithm, d in data.items())) # Stream Size Headers csv_writer.writerow(['row'] + flatten([size] * count_columns(d) for _, d in data.items() for size, d in d.items())) # Column Headers csv_writer \ .writerow(['row \ col'] + [col for _, d in data.items() for _, d in d.items() for col, d in first_value(d).items()]) algorithms, sizes, rows, cols = get_keys(data) for row in rows: csv_writer.writerow([row] + [value for _, d in data.items() for _, d in d.items() for value in d[row].values()])
def writerow(writer: csv.writer, prompt_ids: List[str], current: Dict[str, Any]) -> None: writer.writerow([ html.escape(json.dumps(prompt_ids)), html.escape(json.dumps(current)) ])