Beispiel #1
0
    def parse(self, csvsrc):
        text, encoding = self.detect_encoding(csvsrc, default_encodings=['utf-8', 'utf-16'])
        #FIXME: raise parse error if encoding detection fails?
        self.encoding = encoding or 'utf-8'

        sniffer = csv.Sniffer()
        sample = text[:1024]

        try:
            self.dialect = sniffer.sniff(sample)
            if self.dialect.quoting == csv.QUOTE_MINIMAL:
                #HACKISH: most probably a default, not real detection
                self.dialect.quoting = csv.QUOTE_ALL
                self.dialect.doublequote = True
        except csv.Error:
            self.dialect = 'default'

        inputfile = csv.StringIO(text)
        try:
            fieldnames = detect_header(inputfile, self.dialect, self.fieldnames)
            self.fieldnames = fieldnames
        except csv.Error:
            pass

        inputfile.seek(0)
        reader = try_dialects(inputfile, self.fieldnames, self.dialect)

        first_row = True
        for row in reader:
            newce = self.UnitClass()
            newce.fromdict(row)
            if not first_row or not newce.match_header():
                self.addunit(newce)
            first_row = False
Beispiel #2
0
 def getoutput(self):
     output = csv.StringIO()
     writer = csv.DictWriter(output, self.fieldnames,
                             extrasaction='ignore',
                             dialect=self.dialect)
     writer.writeheader()
     for ce in self.units:
         writer.writerow(ce.todict())
     return output.getvalue()
Beispiel #3
0
 def serialize(self, out):
     output = csv.StringIO()
     writer = csv.DictWriter(output, FIELDNAMES, dialect="catkeys")
     # No real headers, the first line contains metadata
     writer.writerow(
         dict(
             zip(FIELDNAMES, [
                 self.header._header_dict[key] for key in FIELDNAMES_HEADER
             ])))
     for unit in self.units:
         writer.writerow(unit.dict)
     out.write(output.getvalue().encode(self.encoding))
Beispiel #4
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output,
                                fieldnames=OMEGAT_FIELDNAMES,
                                dialect="omegat")
        for unit in translated_units:
            writer.writerow(unit.dict)
        out.write(output.getvalue().encode(self.encoding))
Beispiel #5
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output, fieldnames=self._fieldnames, dialect="utx")
        for unit in translated_units:
            writer.writerow(unit.dict)

        result = output.getvalue().encode(self.encoding)
        out.write(self._write_header().encode(self.encoding))
        out.write(result)
Beispiel #6
0
    def serialize(self, out):
        # Check first if there is at least one translated unit
        translated_units = [u for u in self.units if u.istranslated()]
        if not translated_units:
            return

        output = csv.StringIO()
        writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast")
        # No real headers, the first line contains metadata
        self.header.tucount = len(translated_units)
        writer.writerow(dict(zip(WF_FIELDNAMES, [self.header.header[key] for key in WF_FIELDNAMES_HEADER])))

        for unit in translated_units:
            writer.writerow(unit.dict)
        out.write(output.getvalue().encode(self.encoding))
Beispiel #7
0
def detect_header(sample, dialect, fieldnames):
    """Test if file has a header or not, also returns number of columns in first row"""
    inputfile = csv.StringIO(sample)
    try:
        reader = csv.reader(inputfile, dialect)
    except csv.Error:
        try:
            inputfile.seek(0)
            reader = csv.reader(inputfile, 'default')
        except csv.Error:
            inputfile.seek(0)
            reader = csv.reader(inputfile, 'excel')

    header = next(reader)
    columncount = max(len(header), 3)
    if valid_fieldnames(header):
        return header
    return fieldnames[:columncount]