Exemple #1
0
def itertriples(path):
    '''
    Iterates over an N-triples file returning triples as tuples.

    Parameters
    ----------
    path : string
        path to N-triples file.
    '''
    if isinstance(path, file):
        ntfile = path
    else:
        if path.endswith('.gz'):
            ntfile = GzipFile(path)
        else:
            ntfile = open(path)
    ntfile = EncodedFile(ntfile, 'utf-8')
    with closing(ntfile):
        for line in ntfile:
            if line.startswith('#'):
                continue
            # remove trailing newline and dot
            line = line.strip().strip('.').strip()
            # the first two whitespaces are guaranteed to split the line
            # correctly. The trailing part may be a property containing
            # whitespaces, so using str.split is not viable.
            s1 = line.find(' ')
            s2 = line.find(' ', s1 + 1)
            triple = line[:s1], line[s1 + 1:s2], line[s2 + 1:]
            yield triple
Exemple #2
0
    def form_valid(self, form):
        if 'clippings_file' not in self.request.FILES:
            messages.add_message(self.request, messages.ERROR, _('Could not process the uploaded file'))
            return super(UploadMyClippingsFileView, self).form_valid(form)

        try:
            clippings_file = EncodedFile(self.request.FILES['clippings_file'], data_encoding='utf-8', errors='ignore')
            clippings_file_content = clippings_file.read()
            clips = kindle_clipping_parser.get_clips_from_text(clippings_file_content)
        except Exception as e:
            logger.error(f'Error parsing a clippings file.', exc_info=True)
            messages.add_message(
                self.request,
                messages.ERROR,
                _('Couldn\'t process your Clippings. No clippings have been imported. The developer is informed, please try again in a couple of days!')
            )
        else:
            user = self.request.user
            num_books = 0
            num_clippings = 0
            errors = 0
            for book, clippings in clips.items():
                book, created = Book.objects.get_or_create(
                    user=user,
                    title=book,
                )
                if created:
                    num_books += 1
                try:
                    for clip_content in clippings:
                        __, created = Clipping.objects.get_or_create(
                            user=user,
                            content=clip_content,
                            defaults={
                                'book': book,
                            }
                        )
                        if created:
                            num_clippings += 1
                except Exception as e:
                    errors += 1
                    logger.error(f'Error importing a clipping.', exc_info=True)

            if errors > 0:
                messages.add_message(
                    self.request,
                    messages.ERROR,
                    _('{num_clippings} clippings could not be imported'.format(num_clippings=errors))
                )

            messages.add_message(
                self.request,
                messages.SUCCESS,
                _('Successfully imported {num_clippings} new clippings from {num_books} books').format(
                    num_clippings=num_clippings,
                    num_books=num_books,
                )
            )

        return super(UploadMyClippingsFileView, self).form_valid(form)
Exemple #3
0
    def __call__(self):
        index_file_path = self._find_index_file(self.dir, self.filenames)
        self.markup.dir = self.dir.opendir(dirname(index_file_path))
        self.markup.factory = self.factory

        with EncodedFile(self.dir.open(index_file_path, 'rU'), 'utf8',
                         self.encoding) as file:
            self._parse(file)
Exemple #4
0
 def asfile(self):
     header, rows = self.process_result(
         EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8', 'ignore'))
     f = StringIO.StringIO()
     w = csv.writer(f)
     for row in rows:
         w.writerow(row)
     return f
Exemple #5
0
 def tables(self):
     retVal = []
     for fname, fpath in self.infiles:
         tablename = os.path.splitext(fname)[0]
         f = EncodedFile(open(fpath, 'rb'), 'utf-8', 'utf-8', 'ignore')
         c = csv.reader(f)
         retVal.append(self.table_factory(fname, fpath))
     return retVal
Exemple #6
0
 def rollover(self):
     """Roll the StringIO over to a TempFile"""
     if not self._rolled:
         tmp = EncodedFile(TemporaryFile(), data_encoding='utf-8')
         pos = self.buffer.tell()
         tmp.write(self.buffer.getvalue())
         tmp.seek(pos)
         self.buffer.close()
         self._buffer = tmp
Exemple #7
0
 def test_unicode_read(self):
     f = EncodedFile(StringIO("Martin von Löwis,"
                              "Marc André Lemburg,"
                              "Guido van Rossum,"
                              "François Pinard\r\n"),
                     data_encoding='iso-8859-1')
     reader = csv.reader(f)
     self.assertEqual(list(reader), [[
         u"Martin von Löwis", u"Marc André Lemburg", u"Guido van Rossum",
         u"François Pinard"
     ]])
Exemple #8
0
    def form_valid(self, form):
        if 'clippings_file' not in self.request.FILES:
            messages.add_message(self.request, messages.ERROR, _('Could not process the uploaded file'))
            return super(UploadTextFileClippingsView, self).form_valid(form)

        clippings_file = EncodedFile(
            self.request.FILES['clippings_file'],
            'utf-8',
            errors='ignore',
        )
        clippings_file_content = clippings_file.read()
        clips = plaintext_parser.get_clips_from_text(clippings_file_content)
        user = self.request.user
        num_clippings = 0

        try:
            book_title = form.cleaned_data.get('book_title', None)
            book = None
            if book_title:
                book, __ = Book.objects.get_or_create(
                    user=user,
                    title=book_title,
                    defaults={
                        'author_name': form.cleaned_data.get('author', None),
                    },
                )

            for clip_content in clips:
                __, created = Clipping.objects.get_or_create(
                    user=user,
                    content=clip_content,
                    defaults={
                        'book': book,
                    }
                )
                if created:
                    num_clippings += 1
        except Exception as e:
            logger.error(f'Error processing a clippings file.', exc_info=True)
            messages.add_message(
                self.request,
                messages.ERROR,
                _('Couldn\'t process all clippings. The developer is informed, please try again in a couple of days!')
            )
        else:
            messages.add_message(
                self.request,
                messages.SUCCESS,
                _('Successfully uploaded {num_clippings} clippings.').format(
                    num_clippings=num_clippings,
                )
            )
        return super(UploadTextFileClippingsView, self).form_valid(form)
 def asfile(self):
     outfile = EncodedFile(StringIO.StringIO(), 'ascii', errors='ignore')
     cwrite = csv.writer(outfile)
     if self.rows is None:
         return None
     for row in self.rows:
         try:
             cwrite.writerow(row)
         except UnicodeEncodeError:
             print "row error"  # Ignore bad names, grumble grumble
             continue
     return outfile
Exemple #10
0
 def post(self, request):
     if FILE_HEADER in request.FILES:
         f = request.FILES[FILE_HEADER]
         if f:
             data = csv.DictReader(EncodedFile(f, 'utf8', "utf-8-sig"))
             for d in data:
                 print(d)
                 bs = BanamexSerializer(data=self.fix(d))
                 if bs.is_valid():
                     bs.save()
             return Response({"msg": "OK"}, status=status.HTTP_201_CREATED)
     return Response({"msg": "WRONG FILE"},
                     status=status.HTTP_400_BAD_REQUEST)
Exemple #11
0
    def from_file(cls, path, zip_file):
        """Initialise from a file.

        path is the file path for the map inside the zip, without extension.
        zip_file is either a ZipFile or FakeZip object.
        """
        # Some P2Cs may have non-ASCII characters in descriptions, so we
        # need to read it as bytes and convert to utf-8 ourselves - zips
        # don't convert encodings automatically for us.
        try:
            with zip_open_bin(zip_file, path + '.p2c') as file:
                props = Property.parse(
                    # Decode the P2C as UTF-8, and skip unknown characters.
                    # We're only using it for display purposes, so that should
                    # be sufficent.
                    EncodedFile(
                        file,
                        data_encoding='utf-8',
                        errors='replace',
                    ),
                    path,
                )
        except KeyValError:
            # Silently fail if we can't parse the file. That way it's still
            # possible to backup.
            LOGGER.warning('Failed parsing puzzle file!', path, exc_info=True)
            props = Property('portal2_puzzle', [])
            title = None
            desc = _(
                'Failed to parse this puzzle file. It can still be backed up.')
        else:
            props = props.find_key('portal2_puzzle', [])
            title = props['title', None]
            desc = props['description', _('No description found.')]

        if title is None:
            title = '<' + path.rsplit('/', 1)[-1] + '.p2c>'

        return cls(
            filename=os.path.basename(path),
            zip_file=zip_file,
            title=title,
            desc=desc,
            is_coop=srctools.conv_bool(props['coop', '0']),
            create_time=Date(props['timestamp_created', '']),
            mod_time=Date(props['timestamp_modified', '']),
        )
Exemple #12
0
def safe_text_dupfile(f, mode, default_encoding="UTF8"):
    """ return an open text file object that's a duplicate of f on the
        FD-level if possible.
    """
    encoding = getattr(f, "encoding", None)
    try:
        fd = f.fileno()
    except Exception:
        if "b" not in getattr(f, "mode", "") and hasattr(f, "encoding"):
            # we seem to have a text stream, let's just use it
            return f
    else:
        newfd = os.dup(fd)
        if "b" not in mode:
            mode += "b"
        f = os.fdopen(newfd, mode, 0)  # no buffering
    return EncodedFile(f, encoding or default_encoding)
Exemple #13
0
def main():
    parser = ArgumentParser(description=__doc__)
    parser.add_argument('ns_file',
                        metavar='namespaces',
                        help='tab-separated list of namespace codes')
    parser.add_argument('nt_file', metavar='ntriples', help='N-Triples file')
    parser.add_argument('-p',
                        '--properties',
                        action='store_true',
                        help='print properties')
    parser.add_argument('-D', '--destination', help='destination path')
    args = parser.parse_args()
    print
    print 'WARNING: the n-triples file must be already sorted by source,'\
            ' destination!'
    print
    global namespaces
    namespaces = NodesIndex.readns(args.ns_file)
    sys.stdout = EncodedFile(sys.stdout, 'utf-8')
    # expand destination path, check it is not an existing file, create it in
    # case it does not exist
    args.destination = os.path.expanduser(os.path.expandvars(args.destination))
    if os.path.exists(
            args.destination) and not os.path.isdir(args.destination):
        print >> sys.stderr, 'error: not a directory: '\
                '{}'.format(args.destination)
        sys.exit(1)
    elif not os.path.exists(args.destination):
        os.mkdir(args.destination)
        print >> sys.stderr, 'info: created {}'.format(args.destination)
    try:
        tic = time()
        vertexmap, num_triples = _first_pass(args.nt_file, args.properties,
                                             args.destination)
        _second_pass(args.nt_file, vertexmap, num_triples, args.properties,
                     args.destination)
        toc = time()
        etime = timedelta(seconds=round(toc - tic))
        speed = num_triples / (toc - tic)
        print >> sys.stderr, 'info: {:d} triples processed in {} '\
                '({:.2f} triple/s)'.format(num_triples, etime, speed)
    except IOError, e:
        if e.errno == errno.EPIPE:  # broken pipe
            sys.exit(0)
        raise
Exemple #14
0
    def form_valid(self, form):
        if 'clippings_file' not in self.request.FILES:
            messages.add_message(self.request, messages.ERROR,
                                 _('Could not process the uploaded file'))
            return super(UploadMyClippingsFileView, self).form_valid(form)

        try:
            clippings_file = EncodedFile(self.request.FILES['clippings_file'],
                                         'utf-8')
            clippings_file_content = clippings_file.read()
            clips = get_clips_from_text(clippings_file_content)
            user = self.request.user
            num_books = 0
            num_clippings = 0
            for book, clippings in clips.items():
                book, created = Book.objects.get_or_create(
                    user=user,
                    title=book,
                )
                num_books += 1
                for clip_content in clippings:
                    Clipping.objects.get_or_create(
                        user=user,
                        book=book,
                        content=clip_content,
                    )
                    num_clippings += 1
        except Exception as e:
            trace = traceback.format_exc()
            logger.error(f'Error processing a clippings file.\n{e}\n{trace}')
            messages.add_message(
                self.request, messages.ERROR,
                _('Couldn\'t process your Clippings. The developer is informed, please try again in a couple of days!'
                  ))
        else:
            messages.add_message(
                self.request, messages.SUCCESS,
                _('Successfully uploaded {num_clippings} clippings from {num_books} books'
                  ).format(
                      num_clippings=num_clippings,
                      num_books=num_books,
                  ))

        return super(UploadMyClippingsFileView, self).form_valid(form)
 def convert_to_tags(self):
     """
     Read in the file one line at a time. Get the important info, between
     [:16]. Check if this info matches a dictionary entry. If it does, call
     the appropriate function.
     The functions that are called:
         a text function for text
         an open function for open tags
         an open with attribute function for tags with attributes
         an empty with attribute function for tags that are empty but have
         attribtes.
         a closed function for closed tags.
         an empty tag function.
         """
     self.__initiate_values()
     with open(self.__write_to, 'w') as self.__write_obj:
         self.__write_dec()
         with open(self.__file, 'r') as read_obj:
             for line in read_obj:
                 self.__token_info = line[:16]
                 action = self.__state_dict.get(self.__token_info)
                 if action is not None:
                     action(line)
     # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml
     if self.__convert_utf or self.__bad_encoding:
         copy_obj = copy.Copy(bug_handler=self.__bug_handler)
         copy_obj.rename(self.__write_to, self.__file)
         file_encoding = "utf-8"
         if self.__bad_encoding:
             file_encoding = "us-ascii"
         with open(self.__file, 'r') as read_obj:
             with open(self.__write_to, 'w') as write_obj:
                 write_objenc = EncodedFile(write_obj, self.__encoding,
                                            file_encoding, 'replace')
                 for line in read_obj:
                     write_objenc.write(line)
     copy_obj = copy.Copy(bug_handler=self.__bug_handler)
     if self.__copy:
         copy_obj.copy_file(self.__write_to, "convert_to_tags.data")
     copy_obj.rename(self.__write_to, self.__file)
     os.remove(self.__write_to)
Exemple #16
0
def load_snippets_from_txt_file(txt_file, snippet_count, book_id):
    """Load snippet_count snippets from the given text file."""
    size = os.path.getsize(txt_file.name)

    snippets = set()
    enc_file = EncodedFile(txt_file.file, 'utf-8', errors='ignore')
    while len(snippets) < snippet_count:
        starting_byte = random.randint(size / 10, 9 * size / 10)
        # Ignore the first line read since the cursor my start in the middle.
        enc_file.seek(starting_byte)
        line = guarded_readline(enc_file)

        pos = enc_file.tell()
        for i in range(2):
            line = guarded_readline(enc_file)
            if len(line) >= MIN_SNIPPET_SIZE:
                line = unicode(line, encoding='utf-8', errors='ignore')
                if VERBOSE:
                    print("{0} : {1}".format(txt_file.name, pos))
                snippets.add((line.strip(), pos, book_id))
                break
            pos = enc_file.tell()

    return snippets
import os
import csv
import datetime
from codecs import EncodedFile

from Tkinter import Tk
from tkFileDialog import askopenfilename

print("Select file..")

Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
print("Executing..")


with EncodedFile(open(filename, 'rb'),'utf-8','iso8859-1') as input,EncodedFile(open('importThis.csv', 'wb'),'utf-8','iso8859-1') as output:
    #import file
    reader = csv.reader(input, delimiter=',', quotechar='"')
    writer = csv.writer(output, delimiter=',', quoting=csv.QUOTE_ALL, quotechar='"')

    reader.next()
    Header=["date","departure_airport","departure_time","arrival_airport","arrival_time","aircraft_type","aircraft_registration","pic_name","total_time","night","single_engine_vfr","single_engine_ifr","multi_engine_vfr","multi_engine_ifr","pic","co_pilot","multi_pilot","instructor","dual","simulator","ldgs_day","ldgs_night","remarks"]
    #8-18
    writer.writerow(Header)
    for row in reader:
        rad=8
        #row 8-18 is time in seconds in FL need to be HH:MM
        while rad<20:
            tid = ''
            if(row[rad] != ''):
                tid = str(datetime.timedelta(seconds=int(row[int(rad)])))
Exemple #18
0
 def __init__(self, out=None):
     if not out:
         self.out = EncodedFile(sys.stdout, "utf-8")
     else:
         self.out = out
Exemple #19
0
def getAstrolog32(filename):
    """
	examples:
@0102  ; Astrolog chart info.
/qb 6 23 1972  3:00:00 ST -1:00   5:24:00E 43:18:00N
/zi "Zinedine Zidane" "Marseille"
	
@0102  ; Astrolog32 chart info.

; Date is in American format: month day year.

/qb 10 27 1980 10:20:00 ST -1:00  14:39'00E 50:11'00N
/zi "Honzik" "Brandys nad Labem"
	
	"""
    d = {}
    h = open(filename)
    f = EncodedFile(h, "utf-8", "latin-1")
    for line in f.readlines():
        if line[0:3] == "/qb":
            s0 = line.strip().split(' ')
            s = []
            for j in range(len(s0)):
                if s0[j] != '':
                    s.append(s0[j])
            d['month'] = s[1]
            d['day'] = s[2]
            d['year'] = s[3]
            d['hour'], d['minute'], d['second'] = 0, 0, 0
            for x in range(len(s[4].split(':'))):
                if x == 0:
                    d['hour'] = s[4].split(':')[0]
                if x == 1:
                    d['minute'] = s[4].split(':')[1]
                if x == 2:
                    d['second'] = s[4].split(':')[2]

            #timezone
            tz = s[6].split(':')
            d['timezone'] = float(tz[0]) + float(tz[1]) / 60.0
            if float(tz[0]) < 0:
                d['timezone'] = d['timezone'] / -1.0
            #longitude
            lon = s[7].split(':')
            lon.append(lon[-1][-1])
            lon[-2] = lon[-2][0:2]
            d['longitude'] = float(lon[0]) + (float(lon[1]) / 60.0)
            if len(lon) > 3:
                d['longitude'] += float(lon[2]) / 3600.0
            if lon[-1] == 'W':
                d['longitude'] = d['longitude'] / -1.0
            #latitude
            lon = s[8].split(':')
            lon.append(lon[-1][-1])
            lon[-2] = lon[-2][0:2]
            d['latitude'] = float(lon[0]) + (float(lon[1]) / 60.0)
            if len(lon) > 3:
                d['latitude'] += float(lon[2]) / 3600.0
            if lon[-1] == 'S':
                d['latitude'] = d['latitude'] / -1.0

        if line[0:3] == "/zi":
            s0 = line.strip().split('"')
            s = []
            for j in range(len(s0)):
                if s0[j] != '' and s0[j] != ' ':
                    s.append(s0[j])
            d['name'] = s[1]
            d['location'] = s[2]
    f.close()
    return [d]
Exemple #20
0
                                          file, line)
    else:
        s = warnings.formatwarning(message, category, filename, lineno, line)
        logger = logging.getLogger("py.warnings")
        if not logger.handlers:
            if hasattr(sys.stderr, "isatty") and sys.stderr.isatty():
                handler = logging.StreamHandler()  # Logs to stderr by default
            else:
                handler = logging.NullHandler()
            logger.addHandler(handler)
        log(s.strip(), fn=logger.warning)


warnings.showwarning = showwarning

logbuffer = EncodedFile(StringIO(), "UTF-8", errors="replace")


def wx_log(logwindow, msg):
    if logwindow.IsShownOnScreen():
        # Check if log buffer has been emptied or not.
        # If it has, our log message is already included.
        if logbuffer.tell():
            logwindow.Log(msg)


class DummyLogger():
    def critical(self, msg, *args, **kwargs):
        pass

    def debug(self, msg, *args, **kwargs):
Exemple #21
0
 def test_decode_error_dictreader(self):
     """Make sure the error-handling mode is obeyed on DictReaders."""
     file = EncodedFile(StringIO('name,height,weight\nLöwis,2,3'),
                        data_encoding='iso-8859-1')
     reader = csv.DictReader(file, encoding='ascii', errors='ignore')
     self.assertEqual(list(reader)[0]['name'], 'Lwis')
Exemple #22
0
def _process_file(request, changeset, is_issue):
    '''
    checks the file useable encodings and correct lengths
    returns two values
    if all correct:
      - a list of the processed lines (which are lists of the values)
      - False for no failure
    if some error:
      - error message
      - True for having failed
    '''
    # we need a real file to be able to use pythons Universal Newline Support
    tmpfile_handle, tmpfile_name = tempfile.mkstemp(".import")
    for chunk in request.FILES['flatfile'].chunks():
        os.write(tmpfile_handle, chunk)
    os.close(tmpfile_handle)
    tmpfile = open(tmpfile_name, 'U')
    request.tmpfile = tmpfile
    request.tmpfile_name = tmpfile_name

    # check if file starts with byte order mark
    if tmpfile.read(2) == BOM_UTF16:
        enc = 'utf-16'
        # use EncodedFile from codecs to get transparent encoding translation
        upload = EncodedFile(tmpfile, enc)
    # otherwise just do as usual
    else:
        upload = tmpfile
        # charset was None in my local tests, not sure if actually useful here
        enc = request.FILES['flatfile'].charset
    tmpfile.seek(0)

    lines = []
    empty_line = False
    # process the file into a list of lines and check for length
    for line in upload:
        # see if the line can be decoded
        decoded_line, failure = decode_heuristically(line, enc=enc)
        if failure:
            error_text = 'line %s has unknown file encoding.' % line
            return _handle_import_error(request, changeset, error_text)

        split_line = decoded_line.strip('\n').split('\t')

        # if is_issue is set, the first line should be issue line
        if is_issue and not lines:
            # check number of fields
            if len(split_line) != ISSUE_FIELDS:
                error_text = 'issue line %s has %d fields, it must have %d.' \
                             % (split_line, len(split_line), ISSUE_FIELDS)
                return _handle_import_error(request, changeset, error_text)

        # later lines are story lines
        else:
            # we had an empty line just before
            if empty_line:
                error_text = 'The file includes an empty line.'
                return _handle_import_error(request, changeset, error_text)
            # we have an empty line now, OK if it is the last line
            if len(split_line) == 1:
                empty_line = True
                continue

            # check number of fields
            if len(split_line) != SEQUENCE_FIELDS:
                error_text = 'sequence line %s has %d fields, it must have %d.' \
                    % (split_line, len(split_line), SEQUENCE_FIELDS)
                return _handle_import_error(request, changeset, error_text)

            # check here for story_type, otherwise sequences up to an error
            # will be be added
            response, failure = _find_story_type(request, changeset,
                                                 split_line)
            if failure:
                return response, True

        lines.append(split_line)

    tmpfile.close()
    os.remove(tmpfile_name)
    del request.tmpfile
    del request.tmpfile_name
    return lines, False
Exemple #23
0
 def test_decode_error(self):
     """Make sure the specified error-handling mode is obeyed on readers."""
     file = EncodedFile(StringIO('Löwis,2,3'), data_encoding='iso-8859-1')
     reader = csv.reader(file, encoding='ascii', errors='ignore')
     self.assertEqual(list(reader)[0][0], 'Lwis')
Exemple #24
0
def _encode_wrap(f):
    return EncodedFile(f, 'utf-8')
Exemple #25
0
 def buffer(self):
     try:
         return self._buffer
     except AttributeError:
         self._buffer = EncodedFile(BytesIO(), data_encoding='utf-8')
     return self._buffer
Exemple #26
0
 def select(self):
     src_file = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8',
                            'ignore')
     src_file.next()
     reader = csv.reader(src_file)
     return list(reader)
Exemple #27
0
 def columns(self):
     f = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8', 'ignore')
     return [{'name': x, 'datatype': Text} for x in csv.reader(f).next()]