Exemple #1
0
 def rollover(self):
     """Roll the StringIO over to a TempFile"""
     if not self._rolled:
         tmp = EncodedFile(TemporaryFile(dir=self._dir),
                           data_encoding='utf-8')
         pos = self.buffer.tell()
         tmp.write(self.buffer.getvalue())
         tmp.seek(pos)
         self.buffer.close()
         self._buffer = tmp
Exemple #2
0
    def __init__(self, zodb_blob, encoding=None):
        self.zodb_blob = zodb_blob
        blob = zodb_blob.open('r')

        if not encoding:
            encoding = snoop_encoding(blob)

        blob.seek(0)
        if not encoding == "utf-8":
            blob = EncodedFile(blob, "utf-8", encoding)

        self.parser = configparser.ConfigParser()
        self.parser.readfp(blob)
        self.encoding = encoding
        blob.close()
Exemple #3
0
 def convert_to_tags(self):
     """
     Read in the file one line at a time. Get the important info, between
     [:16]. Check if this info matches a dictionary entry. If it does, call
     the appropriate function.
     The functions that are called:
         a text function for text
         an open function for open tags
         an open with attribute function for tags with attributes
         an empty with attribute function for tags that are empty but have
         attribtes.
         a closed function for closed tags.
         an empty tag function.
         """
     self.__initiate_values()
     with open(self.__write_to, 'w') as self.__write_obj:
         self.__write_dec()
         with open(self.__file, 'r') as read_obj:
             for line in read_obj:
                 self.__token_info = line[:16]
                 action = self.__state_dict.get(self.__token_info)
                 if action is not None:
                     action(line)
     # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
     if self.__convert_utf or self.__bad_encoding:
         copy_obj = copy.Copy(bug_handler=self.__bug_handler)
         copy_obj.rename(self.__write_to, self.__file)
         file_encoding = "utf-8"
         if self.__bad_encoding:
             file_encoding = "us-ascii"
         with open(self.__file, 'r') as read_obj:
             with open(self.__write_to, 'w') as write_obj:
                 write_objenc = EncodedFile(write_obj, self.__encoding,
                                 file_encoding, 'replace')
                 for line in read_obj:
                     write_objenc.write(line)
     copy_obj = copy.Copy(bug_handler=self.__bug_handler)
     if self.__copy:
         copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
     copy_obj.rename(self.__write_to, self.__file)
     os.remove(self.__write_to)
Exemple #4
0
def load_snippets_from_txt_file(txt_file, snippet_count, book_id):
    """Load snippet_count snippets from the given text file."""
    size = os.path.getsize(txt_file.name)

    snippets = set()
    enc_file = EncodedFile(txt_file.file, 'utf-8', errors='ignore')
    while len(snippets) < snippet_count:
        starting_byte = random.randint(size / 10, 9 * size / 10)
        # Ignore the first line read since the cursor my start in the middle.
        enc_file.seek(starting_byte)
        line = guarded_readline(enc_file)

        pos = enc_file.tell()
        for i in range(2):
            line = guarded_readline(enc_file)
            if len(line) >= MIN_SNIPPET_SIZE:
                line = unicode(line, encoding='utf-8', errors='ignore')
                if VERBOSE:
                    print("{0} : {1}".format(txt_file.name, pos))
                snippets.add((line.strip(), pos, book_id))
                break
            pos = enc_file.tell()

    return snippets
Exemple #5
0
 def test_decode_error_dictreader(self):
     """Make sure the error-handling mode is obeyed on DictReaders."""
     file = EncodedFile(StringIO('name,height,weight\nLöwis,2,3'),
                        data_encoding='iso-8859-1')
     reader = csv.DictReader(file, encoding='ascii', errors='ignore')
     self.assertEqual(list(reader)[0]['name'], 'Lwis')
Exemple #6
0
 def test_decode_error(self):
     """Make sure the specified error-handling mode is obeyed on readers."""
     file = EncodedFile(StringIO('Löwis,2,3'), data_encoding='iso-8859-1')
     reader = csv.reader(file, encoding='ascii', errors='ignore')
     self.assertEqual(list(reader)[0][0], 'Lwis')
Exemple #7
0
 def buffer(self):
     try:
         return self._buffer
     except AttributeError:
         self._buffer = EncodedFile(BytesIO(), data_encoding='utf-8')
     return self._buffer
Exemple #8
0
def _encode_wrap(f):
    return EncodedFile(f, 'utf-8')
Exemple #9
0
    def form_valid(self, form):
        if 'clippings_file' not in self.request.FILES:
            messages.add_message(self.request, messages.ERROR,
                                 _('Could not process the uploaded file'))
            return super(UploadMyClippingsFileView, self).form_valid(form)

        try:
            clippings_file = EncodedFile(self.request.FILES['clippings_file'],
                                         data_encoding='utf-8',
                                         errors='ignore')
            clippings_file_content = clippings_file.read()
            clips = kindle_clipping_parser.get_clips_from_text(
                clippings_file_content)

            # Save the file in db
            language_header = self.request.META.get('HTTP_ACCEPT_LANGUAGE')
            MyClippingsFile.objects.create_file(
                content=clippings_file_content,
                language_header=language_header)
        except Exception as e:
            logger.error(f'Error parsing a clippings file.', exc_info=True)
            messages.add_message(
                self.request, messages.ERROR,
                _('Couldn\'t process your Clippings. No clippings have been imported. The developer is informed, please try again in a couple of days!'
                  ))
        else:
            user = self.request.user
            num_books = 0
            num_clippings = 0
            errors = 0
            for book, clippings in clips.items():
                book, created = Book.objects.get_or_create(
                    user=user,
                    title=book,
                )
                if created:
                    num_books += 1
                try:
                    for clip_content in clippings:
                        __, created = Clipping.objects.get_or_create(
                            user=user,
                            content=clip_content,
                            defaults={
                                'book': book,
                            })
                        if created:
                            num_clippings += 1
                except Exception as e:
                    errors += 1
                    logger.error(f'Error importing a clipping.', exc_info=True)

            if errors > 0:
                messages.add_message(
                    self.request, messages.ERROR,
                    _('{num_clippings} clippings could not be imported'.format(
                        num_clippings=errors)))

            messages.add_message(
                self.request, messages.SUCCESS,
                _('Successfully imported {num_clippings} new clippings from {num_books} books'
                  ).format(
                      num_clippings=num_clippings,
                      num_books=num_books,
                  ))

        return super(UploadMyClippingsFileView, self).form_valid(form)
Exemple #10
0
 def __init__(self, out = None):
     if not out:
         self.out = EncodedFile(sys.stdout, "utf-8")
     else:
         self.out = out
Exemple #11
0
 def select(self):
     src_file = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8',
                            'ignore')
     src_file.next()
     reader = csv.reader(src_file)
     return list(reader)
Exemple #12
0
                                          file, line)
    else:
        s = warnings.formatwarning(message, category, filename, lineno, line)
        logger = logging.getLogger("py.warnings")
        if not logger.handlers:
            if hasattr(sys.stderr, "isatty") and sys.stderr.isatty():
                handler = logging.StreamHandler()  # Logs to stderr by default
            else:
                handler = logging.NullHandler()
            logger.addHandler(handler)
        log(s.strip(), fn=logger.warning)


warnings.showwarning = showwarning

logbuffer = EncodedFile(StringIO(), "UTF-8", errors="replace")


def wx_log(logwindow, msg):
    if logwindow.IsShownOnScreen():
        # Check if log buffer has been emptied or not.
        # If it has, our log message is already included.
        if logbuffer.tell():
            logwindow.Log(msg)


class DummyLogger():
    def critical(self, msg, *args, **kwargs):
        pass

    def debug(self, msg, *args, **kwargs):
def getAstrolog32(filename):
	"""
	examples:
@0102  ; Astrolog chart info.
/qb 6 23 1972  3:00:00 ST -1:00   5:24:00E 43:18:00N
/zi "Zinedine Zidane" "Marseille"
	
@0102  ; Astrolog32 chart info.

; Date is in American format: month day year.

/qb 10 27 1980 10:20:00 ST -1:00  14:39'00E 50:11'00N
/zi "Honzik" "Brandys nad Labem"
	
	"""
	d={}
	h=open(filename)
	f=EncodedFile(h,"utf-8","latin-1")
	for line in f.readlines():
		if line[0:3] == "/qb":
			s0=line.strip().split(' ')
			s=[]
			for j in range(len(s0)):
				if s0[j]!='':
					s.append(s0[j])
			d['month']=s[1]
			d['day']=s[2]
			d['year']=s[3]
			d['hour'],d['minute'],d['second']=0,0,0
			for x in range(len(s[4].split(':'))):
				if x == 0:
					d['hour'] = s[4].split(':')[0]
				if x == 1:
					d['minute'] = s[4].split(':')[1]
				if x == 2:
					d['second'] = s[4].split(':')[2]

			#timezone
			tz=s[6].split(':')
			d['timezone']=float(tz[0])+float(tz[1])/60.0
			if float(tz[0]) < 0:
				d['timezone']=d['timezone']/-1.0
			#longitude
			lon=s[7].split(':')
			lon.append(lon[-1][-1])
			lon[-2]=lon[-2][0:2]
			d['longitude']=float(lon[0])+(float(lon[1])/60.0)
			if len(lon) > 3:
				d['longitude']+=float(lon[2])/3600.0
			if lon[-1] == 'W':
				d['longitude'] = d['longitude']/-1.0
			#latitude
			lon=s[8].split(':')
			lon.append(lon[-1][-1])
			lon[-2]=lon[-2][0:2]
			d['latitude']=float(lon[0])+(float(lon[1])/60.0)
			if len(lon) > 3:
				d['latitude']+=float(lon[2])/3600.0
			if lon[-1] == 'S':
				d['latitude'] = d['latitude']/-1.0			
			
		if line[0:3] == "/zi":
			s0=line.strip().split('"')
			s=[]
			for j in range(len(s0)):
				if s0[j] != '' and s0[j] != ' ':
					s.append(s0[j])
			d['name']=s[1]
			d['location']=s[2]
Exemple #14
0
 def __init__(self, out=None):
     if not out:
         self.out = EncodedFile(sys.stdout, "utf-8")
     else:
         self.out = out
Exemple #15
0
class DocGenerator(xmlapp.Application):
    def __init__(self, out=None):
        if not out:
            self.out = EncodedFile(sys.stdout, "utf-8")
        else:
            self.out = out

    def handle_pi(self, target, remainder):
        self.out.write("<?%s %s?>" % (target, remainder))

    def handle_start_tag(self, name, amap):
        self.out.write("<" + name)
        for (name, value) in amap.items():
            self.out.write(' %s="%s"' % (name, escape_attval(value)))
        self.out.write(">")

    def handle_end_tag(self, name):
        self.out.write("</%s>" % name)

    def handle_ignorable_data(self, data, start_ix, end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))

    def handle_data(self, data, start_ix, end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))
Exemple #16
0
 def flush(self):
     blob = self.zodb_blob.open('w')
     if not self.encoding == "utf-8":
         blob = EncodedFile(blob, "utf-8", self.encoding)
     self.parser.write(blob)
     blob.close()
Exemple #17
0
 def select(self):
     src_file = EncodedFile(open(self.fpath,'rb'),'utf-8','utf-8','ignore')
     src_file.next()
     reader = csv.reader(src_file)
     return list(reader)
import os
import csv
import datetime
from codecs import EncodedFile

from Tkinter import Tk
from tkFileDialog import askopenfilename

print("Select file..")

Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
print("Executing..")


with EncodedFile(open(filename, 'rb'),'utf-8','iso8859-1') as input,EncodedFile(open('importThis.csv', 'wb'),'utf-8','iso8859-1') as output:
    #import file
    reader = csv.reader(input, delimiter=',', quotechar='"')
    writer = csv.writer(output, delimiter=',', quoting=csv.QUOTE_ALL, quotechar='"')

    reader.next()
    Header=["date","departure_airport","departure_time","arrival_airport","arrival_time","aircraft_type","aircraft_registration","pic_name","total_time","night","single_engine_vfr","single_engine_ifr","multi_engine_vfr","multi_engine_ifr","pic","co_pilot","multi_pilot","instructor","dual","simulator","ldgs_day","ldgs_night","remarks"]
    #8-18
    writer.writerow(Header)
    for row in reader:
        rad=8
        #row 8-18 is time in seconds in FL need to be HH:MM
        while rad<20:
            tid = ''
            if(row[rad] != ''):
                tid = str(datetime.timedelta(seconds=int(row[int(rad)])))
Exemple #19
0
 def columns(self):
     f = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8', 'ignore')
     return [{'name': x, 'datatype': Text} for x in csv.reader(f).next()]
def getAstrolog32(filename):
	"""
	examples:
@0102  ; Astrolog chart info.
/qb 6 23 1972  3:00:00 ST -1:00   5:24:00E 43:18:00N
/zi "Zinedine Zidane" "Marseille"
	
@0102  ; Astrolog32 chart info.

; Date is in American format: month day year.

/qb 10 27 1980 10:20:00 ST -1:00  14:39'00E 50:11'00N
/zi "Honzik" "Brandys nad Labem"
	
	"""
	d={}
	h=open(filename)
	f=EncodedFile(h,"utf-8","latin-1")
	for line in f.readlines():
		if line[0:3] == "/qb":
			s0=line.strip().split(' ')
			s=[]
			for j in range(len(s0)):
				if s0[j]!='':
					s.append(s0[j])
			d['month']=s[1]
			d['day']=s[2]
			d['year']=s[3]
			d['hour'],d['minute'],d['second']=0,0,0
			for x in range(len(s[4].split(':'))):
				if x == 0:
					d['hour'] = s[4].split(':')[0]
				if x == 1:
					d['minute'] = s[4].split(':')[1]
				if x == 2:
					d['second'] = s[4].split(':')[2]

			#timezone
			tz=s[6].split(':')
			d['timezone']=float(tz[0])+float(tz[1])/60.0
			if float(tz[0]) < 0:
				d['timezone']=d['timezone']/-1.0
			#longitude
			lon=s[7].split(':')
			lon.append(lon[-1][-1])
			lon[-2]=lon[-2][0:2]
			d['longitude']=float(lon[0])+(float(lon[1])/60.0)
			if len(lon) > 3:
				d['longitude']+=float(lon[2])/3600.0
			if lon[-1] == 'W':
				d['longitude'] = d['longitude']/-1.0
			#latitude
			lon=s[8].split(':')
			lon.append(lon[-1][-1])
			lon[-2]=lon[-2][0:2]
			d['latitude']=float(lon[0])+(float(lon[1])/60.0)
			if len(lon) > 3:
				d['latitude']+=float(lon[2])/3600.0
			if lon[-1] == 'S':
				d['latitude'] = d['latitude']/-1.0			
			
		if line[0:3] == "/zi":
			s0=line.strip().split('"')
			s=[]
			for j in range(len(s0)):
				if s0[j] != '' and s0[j] != ' ':
					s.append(s0[j])
			d['name']=s[1]
			d['location']=s[2]
Exemple #21
0
def _process_file(request, changeset, is_issue):
    '''
    checks the file useable encodings and correct lengths
    returns two values
    if all correct:
      - a list of the processed lines (which are lists of the values)
      - False for no failure
    if some error:
      - error message
      - True for having failed
    '''
    # we need a real file to be able to use pythons Universal Newline Support
    tmpfile_handle, tmpfile_name = tempfile.mkstemp(".import")
    for chunk in request.FILES['flatfile'].chunks():
        os.write(tmpfile_handle, chunk)
    os.close(tmpfile_handle)
    tmpfile = open(tmpfile_name, 'U')
    request.tmpfile = tmpfile
    request.tmpfile_name = tmpfile_name

    # check if file starts with byte order mark
    if tmpfile.read(2) == BOM_UTF16:
        enc = 'utf-16'
        # use EncodedFile from codecs to get transparent encoding translation
        upload = EncodedFile(tmpfile, enc)
    # otherwise just do as usual
    else:
        upload = tmpfile
        # charset was None in my local tests, not sure if actually useful here
        enc = request.FILES['flatfile'].charset
    tmpfile.seek(0)

    lines = []
    empty_line = False
    # process the file into a list of lines and check for length
    for line in upload:
        # see if the line can be decoded
        decoded_line, failure = decode_heuristically(line, enc=enc)
        if failure:
            error_text = 'line %s has unknown file encoding.' % line
            return _handle_import_error(request, changeset, error_text)

        split_line = decoded_line.strip('\n').split('\t')

        # if is_issue is set, the first line should be issue line
        if is_issue and not lines:
            # check number of fields
            if len(split_line) != ISSUE_FIELDS:
                error_text = 'issue line %s has %d fields, it must have %d.' \
                             % (split_line, len(split_line), ISSUE_FIELDS)
                return _handle_import_error(request, changeset, error_text)

        # later lines are story lines
        else:
            # we had an empty line just before
            if empty_line:
                error_text = 'The file includes an empty line.'
                return _handle_import_error(request, changeset, error_text)
            # we have an empty line now, OK if it is the last line
            if len(split_line) == 1:
                empty_line = True
                continue

            # check number of fields
            if len(split_line) != SEQUENCE_FIELDS:
                error_text = 'sequence line %s has %d fields, it must have %d.' \
                    % (split_line, len(split_line), SEQUENCE_FIELDS)
                return _handle_import_error(request, changeset, error_text)

            # check here for story_type, otherwise sequences up to an error
            # will be be added
            response, failure = _find_story_type(request, changeset,
                                                 split_line)
            if failure:
                return response, True

        lines.append(split_line)

    tmpfile.close()
    os.remove(tmpfile_name)
    del request.tmpfile
    del request.tmpfile_name
    return lines, False
Exemple #22
0
class DocGenerator(xmlapp.Application):

    def __init__(self, out = None):
        if not out:
            self.out = EncodedFile(sys.stdout, "utf-8")
        else:
            self.out = out
    
    def handle_pi(self, target, remainder):
        self.out.write("<?%s %s?>" % (target, remainder))

    def handle_start_tag(self,name,amap):
        self.out.write("<"+name)
        for (name, value) in amap.items():
            self.out.write(' %s="%s"' % (name, escape_attval(value)))
        self.out.write(">")

    def handle_end_tag(self,name):
        self.out.write("</%s>" % name)

    def handle_ignorable_data(self,data,start_ix,end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))

    def handle_data(self,data,start_ix,end_ix):
        self.out.write(escape_content(data[start_ix:end_ix]))