def __str__(self): output = csv.StringIO() header_output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast") unit_count = 0 for unit in self.units: if unit.istranslated(): unit_count += 1 writer.writerow(unit.dict) if unit_count == 0: return "" output.reset() self.header.tucount = unit_count outheader = csv.DictWriter(header_output, fieldnames=WF_FIELDNAMES_HEADER, dialect="wordfast") outheader.writerow(self.header.header) header_output.reset() decoded = "".join(header_output.readlines() + output.readlines()).decode('utf-8') try: return decoded.encode(self._encoding) except UnicodeEncodeError: return decoded.encode('utf-16')
def parse(self, response): csv_file = csv.StringIO(response.body) for row in csv.reader(csv_file): meta = {} loader = ProductLoader(item=Product(), selector="") loader.add_value('identifier', row[0]) loader.add_value('name', row[1].decode('iso-8859-15')) author = row[3].decode('iso-8859-15') loader.add_value('price', row[4] or '0') loader.add_value('category', row[5].decode('iso-8859-15')) loader.add_value('category', row[6].decode('iso-8859-15')) loader.add_value('url', row[8]) loader.add_value('image_url', row[9]) publisher = row[12].decode('iso-8859-15') loader.add_value('sku', row[21].decode('iso-8859-15')) loader.add_value('brand', publisher) product = loader.load_item() if author: meta['author'] = author if publisher: meta['publisher'] = publisher product['metadata'] = meta yield product
def parse(self, csvsrc): csvfile = csv.StringIO(csvsrc) reader = SimpleDictReader(csvfile, self.fieldnames) for row in reader: newce = self.UnitClass() newce.fromdict(row) self.addunit(newce)
def csv_to_dict(content, evaluate=True): """ Create dictionary representation of csv content. """ delimiter = ';' quotechar = '"' key = 'Sources / Destinations' C = '#' e = lambda s: s if evaluate: e = lambda s: eval(s, globals(), comedi.__dict__) F = csv.StringIO(content) dR = csv.DictReader(F, delimiter=delimiter, quotechar=quotechar) D = { e(r[key]): { e(f) for f, c in r.items() if f != key and f[:1] not in ['', C] and c.strip()[:1] == '1' } for r in dR if r[key][:1] not in ['', C] } # now, go back through and eliminate all empty dictionaries D = {k: v for k, v in D.items() if v} return D
def _dump_spec(self): buf = csv.StringIO() writer = csv.DictWriter(buf, fieldnames=[ "uncompressed_length", "section_length", "section_index", "compressed", "section_offset" ]) writer.writeheader() for i in range(len(self.sections)): writer.writerow({ "uncompressed_length": self.section_uncompressed_lengths[i], "section_length": self.section_lengths[i], "section_index": self.section_indices[i], "compressed": self.section_compressed[i], "section_offset": self.section_offsets[i] }) return buf.getvalue().encode('ascii')
def parse(self, response): page = response.text startdate = self.startdate enddate = self.enddate startindex = None endindex = None if page[-2] != '=': list_contents = [] series_contents = pd.Series() page = csv.StringIO(page) page = csv.reader(page) for line in page: if len(line) == 6: list_contents.append(line) for i in range(10): if list_contents[i][0][0] == 'a': startpoint = i break list_contents = list_contents[startpoint:] for i in list_contents: if i[0][0] == 'a': stamp = int(i[0][1:]) date = datetime.datetime.fromtimestamp(stamp).date() i[0] = str(date) series_contents[i[0]] = i[1:] else: i[0] = str(date + datetime.timedelta(int(i[0]))) series_contents[i[0]] = i[1:] for i in range(300): datecheck = str(startdate + datetime.timedelta(i)) if datecheck in series_contents.keys(): startindex = datecheck break for i in range(300): datecheck = str(enddate - datetime.timedelta(i)) if datecheck in series_contents.keys(): endindex = datecheck break if (startindex != None) and (endindex != None): for i in series_contents[startindex:endindex].keys(): yield StockDailyPriceItem( symbol=response.meta['symbol'], exchange_symbol=response.meta['exchange_symbol'], date=i, open=series_contents[i][3], close=series_contents[i][0], high=series_contents[i][1], low=series_contents[i][2], volume=series_contents[i][4], )
def serialize(self, out): output = csv.StringIO() writer = csv_utils.UnicodeDictWriter(output, FIELDNAMES, encoding=self.encoding, dialect="catkeys") # No real headers, the first line contains metadata writer.writerow(dict(zip(FIELDNAMES, [self.header._header_dict[key] for key in FIELDNAMES_HEADER]))) for unit in self.units: writer.writerow(unit.dict) out.write(output.getvalue() if six.PY2 else output.getvalue().encode(self.encoding))
def getoutput(self): csvfile = csv.StringIO() writer = csv.DictWriter(csvfile, self.fieldnames) for ce in self.units: cedict = ce.todict() writer.writerow(cedict) csvfile.reset() return "".join(csvfile.readlines())
def serialize(self): output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=FIELDNAMES_HEADER, dialect="catkeys") writer.writerow(self.header._header_dict) writer = csv.DictWriter(output, fieldnames=FIELDNAMES, dialect="catkeys") for unit in self.units: writer.writerow(unit.dict) return output.getvalue()
def getoutput(self): output = csv.StringIO() writer = csv.DictWriter(output, self.fieldnames, extrasaction="ignore", dialect=self.dialect) writer.writeheader() for ce in self.units: writer.writerow(ce.todict()) return output.getvalue()
def getoutput(self): output = csv.StringIO() writer = csv_utils.UnicodeDictWriter(output, self.fieldnames, encoding=self.encoding, extrasaction='ignore', dialect=self.dialect) # writeheader() would need Python 2.7 writer.writerow(dict(zip(self.fieldnames, self.fieldnames))) for ce in self.units: writer.writerow(ce.todict()) return output.getvalue()
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES, dialect="omegat") for unit in translated_units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=self._fieldnames, dialect="utx") for unit in translated_units: writer.writerow(unit.dict) result = output.getvalue().encode(self.encoding) out.write(self._write_header().encode(self.encoding)) out.write(result)
def __str__(self): output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=self._fieldnames, dialect="utx") unit_count = 0 for unit in self.units: if unit.istranslated(): unit_count += 1 writer.writerow(unit.dict) if unit_count == 0: return "" output.reset() return self._write_header() + "".join(output.readlines())
def _load_spec(self, data: str): buf = csv.StringIO(data) reader = csv.DictReader(buf) i = 0 for line in reader: self.section_uncompressed_lengths[i] = int(line['uncompressed_length']) self.section_lengths[i] = int(line['section_length']) self.section_indices[i] = int(line['section_index']) self.section_compressed[i] = int(line['compressed']) self.section_offsets[i] = int(line['section_offset']) i += 1 self.sections_count = i
def serialize(self, out): output = csv.StringIO() writer = csv.DictWriter(output, FIELDNAMES, dialect="catkeys") # Calculate/update fingerprint self.header.setchecksum(self._compute_fingerprint()) # No real headers, the first line contains metadata writer.writerow( dict( zip(FIELDNAMES, [ self.header._header_dict[key] for key in FIELDNAMES_HEADER ]))) for unit in self.units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast") # No real headers, the first line contains metadata self.header.tucount = len(translated_units) writer.writerow(dict(zip(WF_FIELDNAMES, [self.header.header[key] for key in WF_FIELDNAMES_HEADER]))) for unit in translated_units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def _get_run_extra(self): """Use this in child modules to append into the run arguments. Returns: list: Additional arguments to add into docker run command. """ self.ensure_one() if not self.repo_id.docker_run_extra_args: return [] f_extra = csv.StringIO(self.repo_id.docker_run_extra_args) f_extra_csv = csv.reader(f_extra) try: extra_cmd = f_extra_csv.__next__() except StopIteration: extra_cmd = [] return extra_cmd
def __str__(self): output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES, dialect="omegat") unit_count = 0 for unit in self.units: if unit.istranslated(): unit_count += 1 writer.writerow(unit.dict) if unit_count == 0: return "" output.reset() decoded = "".join(output.readlines()).decode('utf-8') try: return decoded.encode(self._encoding) except UnicodeEncodeError: return decoded.encode('utf-8')
def getCsvReader(self, filename): # Helper function to grab a specific file (stops, routes, etc.) # Open as ZipExtFile bytesFile = self.gtfsZip.open(filename, 'r') # Read the ZipExtFile readFile = bytesFile.read() # Decode the ZipExtFile to CSV format using StringIO csvFile = csv.StringIO(readFile.decode()) # Close the file bytesFile.close() # Return a dict reader for the CSV return csv.DictReader(csvFile)
def csv2config(input, config=None): if not hasattr(input, 'read'): input = csv.StringIO(input) if not config: config = ConfigParser() reader = csv.DictReader(input) for row in reader: section = row[reader.fieldnames[0]] config.add_section(section) for name, value in row.items(): if value and name != reader.fieldnames[0]: config.set(section, name, value) return config
def detect_header(sample, dialect, fieldnames): """Test if file has a header or not, also returns number of columns in first row""" inputfile = csv.StringIO(sample) try: reader = csv.reader(inputfile, dialect) except csv.Error: try: inputfile.seek(0) reader = csv.reader(inputfile, 'default') except csv.Error: inputfile.seek(0) reader = csv.reader(inputfile, 'excel') header = next(reader) columncount = max(len(header), 3) if valid_fieldnames(header): return header return fieldnames[:columncount]
def _load_spec(self, data: bytes): buf = csv.StringIO(data.decode('ascii')) reader = csv.DictReader(buf, fieldnames=[ "uncompressed_length", "section_length", "section_index", "compressed", "section_offset" ]) i = 0 for line in reader: self.uncompressed_lengths[i] = line['uncompressed_length'] self.section_lengths[i] = line['section_length'] self.section_indices[i] = line['section_index'] self.section_compressed[i] = line['compressed'] self.section_offsets[i] = line['section_offset'] i += 1 self.sections_count = i
def parse_store(cls, storefile): """ Parses the store. """ storeclass = cls.get_class() # Did we get file or filename? if not hasattr(storefile, 'read'): storefile = open(storefile, 'rb') # Read content for fixups content = storefile.read() storefile.seek(0) # Parse file store = storeclass.parsefile(storefile) # Did headers detection work? if store.fieldnames != ['location', 'source', 'target']: return store if not isinstance(content, six.string_types) and six.PY3: content = content.decode('utf-8') fileobj = csv.StringIO(content) storefile.close() # Try reading header reader = csv.reader(fileobj, store.dialect) header = next(reader) fileobj.close() # We seem to have match if len(header) != 2: return store result = storeclass(fieldnames=['source', 'target']) if six.PY3: result.parse(content.encode('utf-8')) else: result.parse(content) return result
def parse(self, csvsrc, sample_length=1024): if self._encoding == "auto": text, encoding = self.detect_encoding( csvsrc, default_encodings=["utf-8", "utf-16"]) # FIXME: raise parse error if encoding detection fails? self.encoding = encoding or "utf-8" else: text = csvsrc.decode(self.encoding) sniffer = csv.Sniffer() if sample_length: sample = text[:sample_length] else: sample = text try: self.dialect = sniffer.sniff(sample) if self.dialect.quoting == csv.QUOTE_MINIMAL: # HACKISH: most probably a default, not real detection self.dialect.quoting = csv.QUOTE_ALL self.dialect.doublequote = True except csv.Error: self.dialect = "default" inputfile = csv.StringIO(text) try: fieldnames = detect_header(inputfile, self.dialect, self.fieldnames) self.fieldnames = fieldnames except csv.Error: pass inputfile.seek(0) reader = try_dialects(inputfile, self.fieldnames, self.dialect) first_row = True for row in reader: newce = self.UnitClass() newce.fromdict(row) if not first_row or not newce.match_header(): self.addunit(newce) first_row = False
def parse(self, csvsrc): text, encoding = self.detect_encoding( csvsrc, default_encodings=['utf-8', 'utf-16']) #FIXME: raise parse error if encoding detection fails? if encoding and encoding.lower() != 'utf-8': csvsrc = text.encode('utf-8').lstrip(codecs.BOM_UTF8) self.encoding = encoding or 'utf-8' sniffer = csv.Sniffer() # FIXME: maybe we should sniff a smaller sample sample = csvsrc[:1024] if isinstance(sample, six.text_type): sample = sample.encode('utf-8') try: self.dialect = sniffer.sniff(sample) if not self.dialect.escapechar: self.dialect.escapechar = '\\' if self.dialect.quoting == csv.QUOTE_MINIMAL: #HACKISH: most probably a default, not real detection self.dialect.quoting = csv.QUOTE_ALL self.dialect.doublequote = True except csv.Error: self.dialect = 'default' try: fieldnames = detect_header(sample, self.dialect, self.fieldnames) self.fieldnames = fieldnames except csv.Error: pass inputfile = csv.StringIO(csvsrc) reader = try_dialects(inputfile, self.fieldnames, self.dialect) #reader = SimpleDictReader(csvfile, fieldnames=fieldnames, dialect=dialect) first_row = True for row in reader: newce = self.UnitClass() newce.fromdict(row) if not first_row or not newce.match_header(): self.addunit(newce) first_row = False
def parse(self, csvsrc): text, encoding = self.detect_encoding( csvsrc, default_encodings=['utf-8', 'utf-16']) #FIXME: raise parse error if encoding detection fails? self.encoding = encoding or 'utf-8' sniffer = csv.Sniffer() # sniff and detect_header want bytes on Python 2 but text on Python 3 if six.PY2: sample = csvsrc[:1024] else: sample = text[:1024] try: self.dialect = sniffer.sniff(sample) if not self.dialect.escapechar: self.dialect.escapechar = '\\' if self.dialect.quoting == csv.QUOTE_MINIMAL: #HACKISH: most probably a default, not real detection self.dialect.quoting = csv.QUOTE_ALL self.dialect.doublequote = True except csv.Error: self.dialect = 'default' try: fieldnames = detect_header(sample, self.dialect, self.fieldnames) self.fieldnames = fieldnames except csv.Error: pass inputfile = csv.StringIO(csvsrc if six.PY2 else text) reader = try_dialects(inputfile, self.fieldnames, self.dialect) #reader = SimpleDictReader(csvfile, fieldnames=fieldnames, dialect=dialect) first_row = True for row in reader: newce = self.UnitClass() newce.fromdict(row) if not first_row or not newce.match_header(): self.addunit(newce) first_row = False
def pack_as_attach(self): self.ensure_one() Attach = self.env['ir.attachment'] # TODO: Step1. Gen csv from inputs csv_f = csv.StringIO() csv_w = csv.DictWriter(csv_f, ['title', 'value']) lines = [ to_csv_line(l) for l in self.user_input_line_ids if l.question_id.type != 'attach' and l.skipped == False ] for t, v in lines: csv_w.writerow({'title': t, 'value': v}) att = { 'name': "data.csv", 'datas_fname': "data.csv", 'res_model': 'survey.user_input', 'datas': base64.b64encode(csv_f.getvalue().encode('utf-8')), # 'res_field': question.id, 'res_id': self.id, } csv_att = Attach.create(att) # Step2. Gen zip zip_filename = "doc.zip" bIO = io.BytesIO() zip_file = zipfile.ZipFile(bIO, "w", zipfile.ZIP_DEFLATED) for a in Attach.search([('res_id', '=', self.id), ('res_model', '=', 'survey.user_input')]): zip_file.writestr(a.datas_fname, base64.b64decode(a.datas)) zip_file.close() att.update({ 'name': 'attachments', 'datas_fname': 'doc.zip', 'datas': base64.b64encode(bIO.getvalue()) }) Attach.create(att) return True
def getLabels(zfile): print("Extracting labels\n") filenames, md_labels, sc_labels = [], [], [] zip_infolist = zfile.infolist()[1:] for entry in zip_infolist: if '.csv' in entry.filename: with zfile.open(entry) as file: cf = file.read() c = csv.StringIO(cf.decode()) next( c ) # Skip the first line which is the header of csv file for row in c: md_label_strt_ind = row.rfind(';') md_label_end_ind = row.rfind("\r") md_labels.append(row[md_label_strt_ind + 1:md_label_end_ind]) sc_labels_strt_ind = row[:md_label_strt_ind].rfind(';') sc_labels.append(row[sc_labels_strt_ind + 1:md_label_strt_ind]) filename_ind = row[:sc_labels_strt_ind].rfind(';') if filename_ind > -1: f_name = row[filename_ind + 1:sc_labels_strt_ind] else: f_name = row[:sc_labels_strt_ind] if isTest == 1 and f_name == 'IRHT_P_009783.tif': print('No file named ' + f_name + ". This filename will not be added!") else: filenames.append(f_name) zfile.infolist().remove( entry) # remove the csv file from infolist if '.db' in entry.filename: # remove the db file from infolist zfile.infolist().remove(entry) return filenames, sc_labels, md_labels
def to_argv(string_): c = csv.reader(csv.StringIO(string_), delimiter=" ") return list(filter(_is_empty_string, list(c)[0]))