def dump_to_csv(filepath, headings, data):
    with open(filepath, "w") as f:
        fieldnames = [key for key in headings]
        writer = Writer(f)
        writer.writerow(fieldnames)
        for row in data:
            writer.writerow([row[key] for key in headings])
Ejemplo n.º 2
0
def output_for_curation(trait: Trait, curation_writer: csv.writer):
    """
    Write any non-finished Zooma or OxO mappings of a trait to a file for manual curation.
    Also outputs traits without any ontology mappings.

    :param trait: A Trait with no finished ontology mappings in finished_mapping_set
    :param curation_writer: A csv.writer to write non-finished ontology mappings for manual curation
    """

    # Traits which are associated with NT expansion variants are of highest importance, and they need to be curated
    # even if the number of records they are associated with is low.
    trait_frequency = 'NT expansion' if trait.associated_with_nt_expansion else trait.frequency
    output_row = [trait.name, trait_frequency]

    zooma_mapping_list = get_mappings_for_curation(trait.zooma_result_list)

    for zooma_mapping in zooma_mapping_list:
        cell = [zooma_mapping.uri, zooma_mapping.ontology_label, str(zooma_mapping.confidence),
                zooma_mapping.source, 'EFO_CURRENT' if zooma_mapping.in_efo else 'NOT_CONTAINED']
        output_row.append("|".join(cell))

    oxo_mapping_list = get_mappings_for_curation(trait.oxo_result_list)

    for oxo_mapping in oxo_mapping_list:
        cell = [str(oxo_mapping.uri), oxo_mapping.ontology_label, str(oxo_mapping.distance),
                oxo_mapping.query_id, 'EFO_CURRENT' if oxo_mapping.in_efo else 'NOT_CONTAINED']
        output_row.append("|".join(cell))

    curation_writer.writerow(output_row)
Ejemplo n.º 3
0
def MaybeTranscodeMp3(path: str,
                      out_csv: csv.writer,
                      max_bitrate: int = 192,
                      lame_option: int = 3) -> bool:
    """

  See https://trac.ffmpeg.org/wiki/Encode/MP3 for a table of variable bitrate
  values.
  """
    output = subprocess.check_output(['file', path], universal_newlines=True)
    match = re.search(r' (\d+) kbps', output)
    bit_rate = int(match.group(1))

    if bit_rate > max_bitrate:
        size_before = os.path.getsize(path)
        system.ProcessFileAndReplace(
            path,
            lambda inpath, outpath: TranscodeMp3(inpath, outpath, lame_option),
            tempfile_prefix='phd_system_machines_florence_transcode_musiclib_',
            tempfile_suffix='.mp3')
        size_after = os.path.getsize(path)
        logging.info(f'%s changed from %s to %s (%.1f%% reduction)',
                     os.path.basename(path), humanize.naturalsize(size_before),
                     humanize.naturalsize(size_after),
                     (1 - (size_after / size_before)) * 100)
        out_csv.writerow(
            [datetime.datetime.now(), path, bit_rate, size_before, size_after])
        return True
    else:
        logging.debug('Ignoring %s', path)
        return False
Ejemplo n.º 4
0
def output_for_curation(trait: Trait, curation_writer: csv.writer):
    """
    Write any non-finished Zooma or OxO mappings of a trait to a file for manual curation.
    Also outputs traits without any ontology mappings.

    :param trait: A Trait with no finished ontology mappings in finished_mapping_set
    :param curation_writer: A csv.writer to write non-finished ontology mappings for manual curation
    """
    output_row = [trait.name, trait.frequency]

    zooma_mapping_list = get_mappings_for_curation(trait.zooma_result_list)

    for zooma_mapping in zooma_mapping_list:
        cell = [zooma_mapping.uri, zooma_mapping.ontology_label, str(zooma_mapping.confidence),
                zooma_mapping.source, 'EFO_CURRENT' if zooma_mapping.in_efo else 'NOT_CONTAINED']
        output_row.append("|".join(cell))

    oxo_mapping_list = get_mappings_for_curation(trait.oxo_result_list)

    for oxo_mapping in oxo_mapping_list:
        cell = [str(oxo_mapping.uri), oxo_mapping.ontology_label, str(oxo_mapping.distance),
                oxo_mapping.query_id, 'EFO_CURRENT' if oxo_mapping.in_efo else 'NOT_CONTAINED']
        output_row.append("|".join(cell))

    curation_writer.writerow(output_row)
Ejemplo n.º 5
0
def parse_results(results: dict, writer: csv.writer) -> None:
    """
    Given a dict of results from the news-api client. Parse through each article and
    use the writer to write a row to the csv.
    """
    for article in results:

        try:
            source = article['source']
        except KeyError as e:
            print("KeyError: source not in results")
            source_id, source_name = '', ''
        else:
            source_id = article['source'].get('id', '')
            source_name = article['source'].get('name', '')

        author = article.get('author', '')
        title = article.get('title', '')
        content = article.get('content', '')
        url = article.get('url', '')
        published_at = article.get('publishedAt', '')

        writer.writerow([
            source_id, source_name, author, title, content, url, published_at
        ])
        COUNTER[0] += 1
Ejemplo n.º 6
0
def log_record_combiner(output_writer: writer,
                        input_reader: reader,
                        header_row: Optional[Sequence[str]] = None) -> bool:
    res = False
    if not header_row or next(input_reader) == header_row:
        output_writer.writerows(input_reader)
        res = True
    return res
Ejemplo n.º 7
0
def output_trait_mapping(trait: Trait, mapping_writer: csv.writer):
    """
    Write any finished ontology mappings for a trait to a csv file writer.

    :param trait: A trait with finished ontology mappings in finished_mapping_set
    :param mapping_writer: A csv.writer to write the finished mappings
    """
    for ontology_entry in trait.finished_mapping_set:
        mapping_writer.writerow([trait.name, ontology_entry.uri, ontology_entry.label])
Ejemplo n.º 8
0
def write_events_to_csv(output_file, calendar, property_names):
    writer = CSVWriter(output_file)
    writer.writerow(property_names)
    for event in get_events(calendar):
        props = []
        for property_name in property_names:
            property_value = get_property_value(event, property_name)
            props.append(property_value)
        writer.writerow(props)
Ejemplo n.º 9
0
    def _write_table_headers(self, writer: csv.writer):
        """
        Writes the headers of the data table to the CSV file.

        :param writer: The ``csv.writer`` object
        """
        elems = self.soup.select(".table-scroll thead tr th")
        headers = [e.getText() for e in elems]
        writer.writerow(headers)
Ejemplo n.º 10
0
def write_header(results:dict, csvwr:csv.writer, values:int):
    row=[""]
    for lvl, algs in results.items():
        for alg in algs.keys():
            header=[lvl+"/"+alg]
            for x in range(0, values-1):
                header.append("")
            row.extend(header)

    csvwr.writerow(row)
Ejemplo n.º 11
0
    def _write_table_rows(self, writer: csv.writer):
        """
        Iterates through the rows of the current data table.
        The data in each row is written to the CSV file.

        :param writer: The ``csv.writer`` object
        """
        row_elems = self.soup.select(".table-scroll tbody tr")
        for row in row_elems:
            elems = row.select("td")
            items = [e.getText() for e in elems]
            writer.writerow(items)
def _iter_xsv_chunks(chunks, prefix="", delimiter=",", quoting=2, lineterminator=None):
    """Iterate chunks in `delimiter`-separated format"""
    fmtparams = dict(delimiter=delimiter, quoting=quoting)
    if lineterminator:
        fmtparams["lineterminator"] = lineterminator
    with StringIO() as handle:
        writer = CSVWriter(handle, **fmtparams)
        for chunk in chunks:
            writer.writerow(chunk)
            handle.seek(0)
            yield prefix + handle.getvalue()
            handle.truncate()
Ejemplo n.º 13
0
def sentence_level_writer(
    writer: csv.writer,
    train_dataset: conlldataloader.ConllDataSet,
    start_id: int = 0,
) -> None:
    for i, data_point in enumerate(tqdm(train_dataset)):
        input_arr = data_point['input']
        input_word_arr = [
            point[constants.CONLL2003_WORD] for point in input_arr
        ]
        input_str = ' '.join(input_word_arr)
        input_id = start_id + i
        writer.writerow([input_id, input_str])
Ejemplo n.º 14
0
def calculate_ssim_across_two_lists(list_one: list, list_two: list,
                                    writer: csv.writer):
    num_images = len(list_one)
    for image_index in range(num_images):
        ssim_value = compare_ssim(list_one[image_index], list_two[image_index])

        sample, modality_one, tile = blk.file_name_parts(list_one[image_index])
        modality_two = blk.file_name_parts(list_two[image_index])[1]

        mouse, slide = sample.split('-')
        modality_pair = modality_one + '-' + modality_two

        writer.writerow([mouse, slide, tile, modality_pair, ssim_value])
Ejemplo n.º 15
0
    def _initialize_index_file(self, index_file: str):
        """Initialize the index if it does not exist yet

        :param index_file: The name and path of the index.
        :type index_file: str
        """
        assert not os.path.exists(
            index_file), 'Cannot initialize, file already exists.'

        with open(index_file, 'wt') as out:
            writer = CsvWriter(out, delimiter='\t')
            writer.writerow(
                ["Description", "Filename", "Location", "DateTime"])
Ejemplo n.º 16
0
    def serialize(cls, value, schema, columns=None, path=None):
        if path:
            for key in path.split('.'):
                schema = schema.get(key)
                if schema and key in value:
                    value = value[key]
                else:
                    raise ValueError(path)

        if not isinstance(value, (list, tuple)):
            raise ValueError(value)
        if not isinstance(schema, Sequence):
            raise ValueError(schema)

        structure = schema.item
        if not isinstance(structure, Structure):
            raise ValueError(structure)

        if isinstance(columns, string):
            columns = columns.split(',')
        if not columns:
            columns = sorted(structure.structure.keys())

        cells = []
        for column in columns:
            field = structure.get(column)
            if field and not field.structural:
                cells.append((column, field))

        content = StringIO()
        writer = Writer(content, cls.Dialect)

        writer.writerow([cell[0] for cell in cells])
        for item in value:
            row = []
            for column, field in cells:
                candidate = item.get(column)
                if candidate is None:
                    row.append('')
                elif field.type == 'boolean':
                    if candidate:
                        row.append('true')
                    else:
                        row.append('false')
                else:
                    row.append(str(candidate))
            else:
                writer.writerow(row)

        return content.getvalue()
Ejemplo n.º 17
0
    def _write_meta_information(self, writer: csv.writer) -> csv.writer:
        writer.writerow(["Corpus", "", self._corpus_name])
        writer.writerow(["User", "", self._author])
        writer.writerow(["Code", "", self._CODE])
        writer.writerow(["Download", "", self._DOWNLOAD])

        return writer
Ejemplo n.º 18
0
def csvify(data, status_code=200):
    if isinstance(data, list):
        fo = StringIO()
        writer = CSVWriter(fo)
        for row in data:
            writer.writerow(row)
        data = fo.getvalue()
    elif not isinstance(data, (str, unicode)):
        raise ValueError("Malformed CSV data")
    return Response(
        status_code=status_code,
        headers={'Content-Type': 'text/csv'},
        body=data,
    )
Ejemplo n.º 19
0
def traceroute(files, drop_private):
    """
    Convert traceroute results from ND-JSON to CSV.

    .. warning::
        Late packets are dropped.

    Columns: ``timestamp, msm_id, prb_id, from_ip, to_ip, paris_id, hop1_1, ..., hop32_3``
    """
    tfip = TracerouteFlatIPTransformer(drop_dup=True,
                                       drop_late=True,
                                       drop_private=drop_private)

    # TODO: Proper context manager?
    output = open(f"traceroutes_{int(dt.datetime.now().timestamp())}.csv", "w")
    writer = CSVWriter(output)
    reader = AtlasRecordsReader.all(files)

    hops = [[f"hop{i}_{j}" for j in range(1, 4)] for i in range(1, 33)]
    writer.writerow([
        "timestamp",
        "msm_id",
        "prb_id",
        "from_ip",
        "to_ip",
        "paris_id",
        *itertools.chain(*hops),
    ])

    for record in tqdm(reader, desc=""):
        record = tfip(record)

        hops = record["hops"]
        assert all(len(hop) == 3 for hop in hops)

        # Pad hops
        if len(hops) < 32:
            hops += [[None] * 3] * (32 - len(hops))

        row = (
            record["timestamp"],
            record["msm_id"],
            record["prb_id"],
            record["from"],
            record["dst_addr"],
            record["paris_id"],
            *itertools.chain(*hops),
        )
        writer.writerow(row)
    def write_serie(self, serie: pd.Series, periodicity: str, fields: dict,
                    writer: csv.writer):
        field_id = fields[serie.name]

        # Filtrado de NaN
        serie = serie[serie.first_valid_index():serie.last_valid_index()]

        df = serie.reset_index().apply(self.rows,
                                       axis=1,
                                       args=(self.fields_data, field_id,
                                             periodicity))

        serie = pd.Series(df.values, index=serie.index)
        for row in serie:
            writer.writerow(row)
Ejemplo n.º 21
0
def normalize_csv_body(is_body: bool, row: Sequence[str],
                       csv_writer: csv.writer) -> None:
    if is_body:
        timestamp, address, zipcode, full_name, foo, bar, total, notes = \
            row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7]

        foo_duration = normalize_duration(foo, 'foo')
        bar_duration = normalize_duration(bar, 'bar')

        csv_writer.writerow([
            normalize_timestamp(timestamp),
            normalize_address(address),
            normalize_zipcode(zipcode),
            normalize_name(full_name), foo_duration, bar_duration,
            normalize_total_duration(foo_duration, bar_duration),
            normalize_notes(notes)
        ])
Ejemplo n.º 22
0
    def add_to_index(self, index_file: str = "../index.csv"):
        """Add current picture to the index.

        :param index_file: The name and path of the index file, defaults to "../index.csv"
        :type index_file: str, optional
        """
        if not os.path.exists(index_file):
            self._initialize_index_file(index_file)

        description = ''
        filename = os.path.basename(self.filename)
        location = self.filename
        try:
            date_and_time = self.DateTime.strftime('%Y:%m:%d %H:%M:%S')
        except AttributeError:
            date_and_time = ''

        with open(index_file, 'a') as out:
            writer = CsvWriter(out, delimiter='\t')
            writer.writerow([description, filename, location, date_and_time])
Ejemplo n.º 23
0
    def write_verb_list(self, writer: csv.writer, data: list) -> int:
        """Sum up all the verbs for each key within a single sample in the
           data list of samples. This list is a list of dictionaries.

        Args:
            writer (csv.writer): CSV writer object
            data (str): List of dictionaries

        Returns:
            int: Number of found verbs
        """
        sub_key_list = list(data[0].keys())

        n_verbs_total = 0
        for sub_key in sub_key_list:
            n_verbs = self.count_words(data, sub_key)
            writer.writerow(["#{}".format(sub_key), n_verbs])
            n_verbs_total += n_verbs

        return n_verbs
Ejemplo n.º 24
0
def main():
    args = mkArguments()

    image = io.imread(args.image)

    plt.imshow(image)

    points = []

    def onButton(event):
        x, y = int(event.xdata), int(event.ydata)
        points.append((x, y))
        print('Rectangle[{}]: at {},{}'.format('start' if len(points) % 2 != 0 else 'end', x, y))

    def onKey(event):
        if event.key == 'q' and points:
            points.pop()
        elif event.key == 'w':
            exit('Error: discarding rectangles, label again')
        elif event.key == 'e':
            plt.close()

    plt.connect('button_press_event', onButton)
    plt.connect('key_press_event', onKey)

    plt.title('mouse press: add point, q: pop point, w: discard, e: next')
    plt.axis('off')
    plt.show()

    if not points or len(points) % 2 != 0:
        exit('Error: unable to make up rectangles from no or odd number of points')

    with open(args.labelFile, 'a') as handle:
        writer = CSVWriter(handle)
        for i in range(0, len(points) - 1, 2):
            p0, p1 = points[i], points[i+1]
            (x0, y0) = np.min([p0, p1], axis=0)
            (x1, y1) = np.max([p0, p1], axis=0)

            # Region is made up of: path, x0, y0, x1, y1 (bottom left, top right)
            writer.writerow([abspath(args.image), x0, y0, x1, y1])
Ejemplo n.º 25
0
    def Sample(self, writer: csv.writer, num_rows: int) -> None:
        """Generate num_rows random rows and write to given CSV."""
        # LifeCycle CSV file has whitespace after commas.
        writer.writerow([
            'START DATE(UTC)',
            ' END DATE(UTC)',
            ' START TIME(LOCAL)',
            ' END TIME(LOCAL)',
            ' DURATION',
            ' NAME',
            ' LOCATION',
            ' NOTE',
        ])
        writer.writerow([])
        start_time = self.start_time_seconds_since_epoch + random.randint(
            0, 3600 * 23)
        for _ in range(num_rows):
            start_time += random.randint(1, 3600)
            end_time = start_time + random.randint(60, 3600 * 8)

            writer.writerow([
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(start_time)),  # START DATE(UTC)
                time.strftime(' %Y-%m-%d %H:%M:%S',
                              time.localtime(end_time)),  # END DATE(UTC)
                ' unused',  # START TIME(LOCAL)
                ' unused',  # END TIME(LOCAL)
                ' unused',  # DURATION
                ' ' + random.choice(self.names),  # NAME
                ' ' + random.choice(self.locations),  # LOCATION
                ' unused',  # NOTE
            ])
Ejemplo n.º 26
0
    def __scan_directory_helper(self, parent_dir: str, dir_name: str,
                                csv_writer: csv.writer) -> None:
        """
        Scans recursively a list of directories and stores filenames and directories in CSV format.

        :param str parent_dir: The name of the parent directory.
        :param str dir_name: The name of the directory.
        :param csv.writer csv_writer: The CSV writer.
        """
        target_name = os.path.join(parent_dir,
                                   dir_name) if dir_name else parent_dir

        first_file = True
        sub_dir_names = []
        for entry in os.scandir(target_name):
            if entry.is_file():
                self.__file_count += 1
                if first_file:
                    first_file = False
                    self.__entry_seq += 1
                csv_writer.writerow(
                    (self.__entry_seq, entry.inode(), dir_name, entry.name))

                if entry.name not in [
                        'attrib', 'backupInfo', 'backuppc-clone.csv'
                ]:
                    self.progress.advance()

            elif entry.is_dir():
                sub_dir_names.append(entry.name)

        for sub_dir_name in sorted(sub_dir_names):
            self.__entry_seq += 1
            self.__dir_count += 1
            csv_writer.writerow(
                (self.__entry_seq, None, dir_name, sub_dir_name))
            self.__scan_directory_helper(parent_dir,
                                         os.path.join(dir_name, sub_dir_name),
                                         csv_writer)
Ejemplo n.º 27
0
def document_level_writer(
    writer: csv.writer,
    train_dataset: conlldataloader.ConllDataSet,
    start_id: int = 0,
) -> None:
    def is_document_start(input_word_arr: List[str]) -> bool:
        return len(input_word_arr) == 1 and input_word_arr[0] == '-DOCSTART-'

    curr_document = []
    document_id = start_id
    for i, data_point in enumerate(tqdm(train_dataset)):
        input_arr = data_point['input']
        input_word_arr = [
            point[constants.CONLL2003_WORD] for point in input_arr
        ]
        if is_document_start(input_word_arr) and len(curr_document) > 0:
            writer.writerow([document_id, ' '.join(curr_document)])
            document_id += 1
            curr_document = []
        else:
            input_str = ' '.join(input_word_arr)
            curr_document.append(input_str)
Ejemplo n.º 28
0
    def _write_data(self, writer: csv.writer, data: list, analysis_data: dict) -> csv.writer:
        writer.writerow(["Found", len(data)])
        writer.writerow(["\n"])
        writer.writerow(["Index", "Sample", "Text",
                         "Year", "Country", "Unmatch"])

        # Write samples from data list
        for idx, phrase in enumerate(data):
            # Write data 1-indexed
            writer.writerow([idx+1,
                             phrase['label'],
                             phrase['text'],
                             phrase['date'],
                             phrase['country'],
                             analysis_data['Unmatch'][idx]])
        return writer
def main():
    if len(argv) < 3:
        stderr.write(
            f"USAGE: {argv[0]} [path to road network] [path to emissions directory]\n"
        )
        exit(1)

    with open(argv[1], 'r', encoding='utf-8') as file:
        network = RoadNetwork(file)

    result = [{}] * 24
    for hour in range(0, 24):
        maps = {}
        for day in range(4, 11):
            with open(osp.join(argv[2],
                               f'2017-07-{day:02d}_{hour:02d}_energy.csv'),
                      'r',
                      encoding='utf-8') as file:
                emissions = EmissionsSnapshot.load(file)

            maps[day] = heatmap.comp_values(network, emissions)[0]
            for prev in range(4, day):
                result[hour][(prev,
                              day)] = heatmap.comp_diff(maps[prev], maps[day])
                print(
                    f"Computed difference for hour {hour:02d}, days {prev:02d} and {day:02d}"
                )

    with open('data/heatmap_diffs.csv', 'w', newline='',
              encoding='utf-8') as csv_file:
        csv_writer = CSVWriter(csv_file)
        csv_writer.writerow(["Day 1", "Day 2", "Hour", "Difference"])

        for hour, diffs in enumerate(result):
            for day1, day2 in diffs.keys():
                csv_writer.writerow([day1, day2, hour, diffs[(day1, day2)]])
Ejemplo n.º 30
0
def MaybeTranscodeMp3(path: str,
                      out_csv: csv.writer,
                      max_bitrate: int = 192,
                      lame_option: int = 3) -> bool:
    """

  See https://trac.ffmpeg.org/wiki/Encode/MP3 for a table of variable bitrate
  values.
  """
    output = subprocess.check_output(["file", path], universal_newlines=True)
    match = re.search(r" (\d+) kbps", output)
    bit_rate = int(match.group(1))

    if bit_rate > max_bitrate:
        size_before = os.path.getsize(path)
        system.ProcessFileAndReplace(
            path,
            lambda inpath, outpath: TranscodeMp3(inpath, outpath, lame_option),
            tempfile_prefix="phd_system_machines_florence_transcode_musiclib_",
            tempfile_suffix=".mp3",
        )
        size_after = os.path.getsize(path)
        app.Log(
            1,
            f"%s changed from %s to %s (%.1f%% reduction)",
            os.path.basename(path),
            humanize.BinaryPrefix(size_before, "B"),
            humanize.BinaryPrefix(size_after, "B"),
            (1 - (size_after / size_before)) * 100,
        )
        out_csv.writerow(
            [datetime.datetime.now(), path, bit_rate, size_before, size_after])
        return True
    else:
        app.Log(2, "Ignoring %s", path)
        return False
Ejemplo n.º 31
0
    def __scan_directory_helper2(self, parent_dir: str, dir_name: str,
                                 csv_writer: csv.writer) -> None:
        """
        Scans recursively a list of directories and stores filenames and directories in CSV format.

        :param str parent_dir: The name of the parent directory.
        :param str dir_name: The name of the directory.
        :param csv.writer csv_writer: The CSV writer.
        """
        sub_dir_names = []
        for entry in os.scandir(os.path.join(parent_dir, dir_name)):
            if entry.is_file():
                self.__count += 1
                csv_writer.writerow((entry.inode(), dir_name, entry.name))

            elif entry.is_dir():
                sub_dir_names.append(entry.name)

        for sub_dir_name in sub_dir_names:
            self.__scan_directory_helper2(parent_dir,
                                          os.path.join(dir_name, sub_dir_name),
                                          csv_writer)

        self.__progress.advance()
Ejemplo n.º 32
0
def main():
    args = get_args()
    data = extract_columns(args, get_data(args))

    csv_writer = CsvWriter(sys.stdout)

    # Algorithms headers
    csv_writer.writerow(['row'] + flatten([algorithm] * count_columns(d)
                                          for algorithm, d in data.items()))
    # Stream Size Headers
    csv_writer.writerow(['row'] + flatten([size] * count_columns(d)
                                          for _, d in data.items()
                                          for size, d in d.items()))
    # Column Headers
    csv_writer \
        .writerow(['row \ col'] + [col for _, d in data.items()
                                       for _, d in d.items()
                                       for col, d in first_value(d).items()])

    algorithms, sizes, rows, cols = get_keys(data)
    for row in rows:
        csv_writer.writerow([row] + [value for _, d in data.items()
                                           for _, d in d.items()
                                           for value in d[row].values()])