Ejemplos de BOM_UTF8.decode en Python, ejemplos de codecs.BOM_UTF8.decode en Python

Ejemplo n.º 1

1

Mostrar archivo

Archivo: fontmaker.py Proyecto: YorkJong/pyFontMaker

def read_unicode(fn):
    """Read an Unicode file that may encode with utf_16_le, utf_16_be, or utf_8.
    """
    from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF8

    with open(fn, "rb") as in_file:
        bs = in_file.read()

    if  bs.startswith(BOM_UTF16_LE):
        us = bs.decode("utf_16_le").lstrip(BOM_UTF16_LE.decode("utf_16_le"))
    elif  bs.startswith(BOM_UTF16_BE):
        us = bs.decode("utf_16_be").lstrip(BOM_UTF16_BE.decode("utf_16_be"))
    else:
        us = bs.decode("utf_8").lstrip(BOM_UTF8.decode("utf_8"))

    return us

Ejemplo n.º 2

0

Mostrar archivo

Archivo: stop.py Proyecto: derickl/gtfs-server

    def test_import_stops_txt_bom(self):
        if PY3:  # pragma: no cover
            text = (BOM_UTF8.decode('utf-8') + """\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
        else:
            text = (BOM_UTF8 + b"""\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
        stops_txt = StringIO(text)
        Stop.import_txt(stops_txt, self.feed)
        self.assertEqual(Stop.objects.count(), 2)
        station = Stop.objects.get(stop_id='FUR_CREEK_STA')
        stop = Stop.objects.get(stop_id='FUR_CREEK_RES')
        self.assertEqual(stop.parent_station, station)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_utils.py Proyecto: Jim-tech/windows-vim

def test_utf8_bom():
    unicode_bom = BOM_UTF8.decode('utf-8')

    module = parso.parse(unicode_bom)
    endmarker = module.children[0]
    assert endmarker.type == 'endmarker'
    assert unicode_bom == endmarker.prefix

    module = parso.parse(unicode_bom + 'foo = 1')
    expr_stmt = module.children[0]
    assert expr_stmt.type == 'expr_stmt'
    assert unicode_bom == expr_stmt.get_first_leaf().prefix

Ejemplo n.º 4

0

Mostrar archivo

Archivo: compat.py Proyecto: alaw005/django-multi-gtfs

def bom_prefix_csv(text):
    """
    Prefix CSV text with a Byte-order Marker (BOM).

    The return value needs to be encoded differently so the CSV reader will
    handle the BOM correctly:
    - Python 2 returns a UTF-8 encoded bytestring
    - Python 3 returns unicode text
    """
    if PY3:
        return BOM_UTF8.decode('utf-8') + text
    else:
        return BOM_UTF8 + text.encode('utf-8')

Ejemplo n.º 5

0

Mostrar archivo

Archivo: agency.py Proyecto: derickl/gtfs-server

    def test_import_bom(self):
        if PY3:  # pragma: no cover
            text = (BOM_UTF8.decode('utf-8') + """\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
        else:
            text = (BOM_UTF8 + b"""\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
        agency_txt = StringIO(text)
        Agency.import_txt(agency_txt, self.feed)
        agency = Agency.objects.get()
        self.assertEqual(agency.agency_id, '')
        self.assertEqual(agency.name, 'Demo Transit Authority')
        self.assertEqual(agency.url, 'http://google.com')
        self.assertEqual(agency.timezone, 'America/Los_Angeles')
        self.assertEqual(agency.lang, '')
        self.assertEqual(agency.phone, '')
        self.assertEqual(agency.fare_url, '')

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_builtin_jy.py Proyecto: varialus/jython-legacy

 def test_eval_bom(self):
     self.assertEqual(eval(BOM_UTF8 + '"foo"'), 'foo')
     # Actual BOM ignored, so causes a SyntaxError
     self.assertRaises(SyntaxError, eval,
                       BOM_UTF8.decode('iso-8859-1') + '"foo"')

Ejemplo n.º 7

0

Mostrar archivo

Archivo: __init__.py Proyecto: clips/pattern

decode_utf8 = decode_string
encode_utf8 = encode_string

#### CACHE #########################################################################################
# Caching is implemented in URL.download(), which is used by all other downloaders.

import os
import glob
import tempfile
import datetime

from io import open

from codecs import BOM_UTF8
BOM_UTF8 = BOM_UTF8.decode('utf-8')

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

TMP = os.path.join(tempfile.gettempdir(), "pattern_web")


def date_now():
    return datetime.datetime.today()


def date_modified(path):
    return datetime.datetime.fromtimestamp(os.stat(path)[8])

Ejemplo n.º 8

0

Mostrar archivo

Archivo: commonsense.py Proyecto: pattern3/pattern

from itertools import chain
import os
import sys

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

from codecs import BOM_UTF8
if sys.version > "3":
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

    basestring = str

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

#### COMMONSENSE SEMANTIC NETWORK ########################################

#--- CONCEPT -------------------------------------------------------------


class Concept(Node):

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_prefix.py Proyecto: BlackArch/blackarch-iso

try:
    from itertools import zip_longest
except ImportError:
    # Python 2
    from itertools import izip_longest as zip_longest

from codecs import BOM_UTF8

import pytest

import parso

unicode_bom = BOM_UTF8.decode('utf-8')


@pytest.mark.parametrize(('string', 'tokens'), [
    ('', ['']),
    ('#', ['#', '']),
    (' # ', ['# ', '']),
    (' # \n', ['# ', '\n', '']),
    (' # \f\n', ['# ', '\f', '\n', '']),
    ('  \n', ['\n', '']),
    ('  \n ', ['\n', ' ']),
    (' \f ', ['\f', ' ']),
    (' \f ', ['\f', ' ']),
    (' \r\n', ['\r\n', '']),
    ('\\\n', ['\\\n', '']),
    ('\\\r\n', ['\\\r\n', '']),
    ('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):

Ejemplo n.º 10

0

Mostrar archivo

Archivo: utils.py Proyecto: lvzongting/pelican

 def __enter__(self):
     with open(self.filename, encoding='utf-8') as infile:
         content = infile.read()
     if content[0] == BOM_UTF8.decode('utf8'):
         content = content[1:]
     return content

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_builtin_jy.py Proyecto: aniket134/XBMC-video-plugins

 def test_eval_bom(self):
     self.assertEqual(eval(BOM_UTF8 + '"foo"'), 'foo')
     # Actual BOM ignored, so causes a SyntaxError
     self.assertRaises(SyntaxError, eval,
                       BOM_UTF8.decode('iso-8859-1') + '"foo"')

Ejemplo n.º 12

0

Mostrar archivo

Archivo: tokenize.py Proyecto: andrewgu12/config

from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
                                ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
                                FSTRING_END)
from parso._compatibility import py_version
from parso.utils import split_lines


TokenCollection = namedtuple(
    'TokenCollection',
    'pseudo_token single_quoted triple_quoted endpats whitespace '
    'fstring_pattern_map always_break_tokens',
)

BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

_token_collection_cache = {}

if py_version >= 30:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
else:
    namechars = string.ascii_letters + '_'
    is_identifier = lambda s: s in namechars


def group(*choices, **kwargs):
    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
    assert not kwargs

Ejemplo n.º 13

0

Mostrar archivo

Archivo: base.py Proyecto: alaw005/django-multi-gtfs

from datetime import datetime, date
from logging import getLogger
import re

from django.contrib.gis.db import models
from django.contrib.gis.db.models.query import GeoQuerySet
from django.db.models.fields.related import ManyToManyField
from django.utils.six import StringIO, text_type, PY3

from multigtfs.compat import get_blank_value, write_text_rows

logger = getLogger(__name__)
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
batch_size = 1000
large_queryset_size = 100000
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8


class BaseQuerySet(GeoQuerySet):
    def populated_column_map(self):
        '''Return the _column_map without unused optional fields'''
        column_map = []
        cls = self.model
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_name = field_pattern.split('__', 1)[0]
            else:
                field_name = field_pattern

            # Handle point fields

Ejemplo n.º 14

0

Mostrar archivo

Archivo: tokenize.py Proyecto: sandnattanicha/Final-Project

from __future__ import absolute_import

import itertools as _itertools
import re
import sys
from codecs import BOM_UTF8
from collections import namedtuple
from dataclasses import dataclass
from typing import Dict, Generator, Iterable, Optional, Pattern, Set, Tuple

from libcst._parser.parso.python.token import PythonTokenTypes
from libcst._parser.parso.utils import PythonVersionInfo, split_lines

# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
MAX_UNICODE = "\U0010ffff"
BOM_UTF8_STRING = BOM_UTF8.decode("utf-8")

STRING = PythonTokenTypes.STRING
NAME = PythonTokenTypes.NAME
NUMBER = PythonTokenTypes.NUMBER
OP = PythonTokenTypes.OP
NEWLINE = PythonTokenTypes.NEWLINE
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ASYNC = PythonTokenTypes.ASYNC
AWAIT = PythonTokenTypes.AWAIT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_prefix.py Proyecto: mjjohns1/catboost

from itertools import zip_longest
from codecs import BOM_UTF8

import pytest

import parso

unicode_bom = BOM_UTF8.decode('utf-8')


@pytest.mark.parametrize(('string', 'tokens'), [
    ('', ['']),
    ('#', ['#', '']),
    (' # ', ['# ', '']),
    (' # \n', ['# ', '\n', '']),
    (' # \f\n', ['# ', '\f', '\n', '']),
    ('  \n', ['\n', '']),
    ('  \n ', ['\n', ' ']),
    (' \f ', ['\f', ' ']),
    (' \f ', ['\f', ' ']),
    (' \r\n', ['\r\n', '']),
    (' \r', ['\r', '']),
    ('\\\n', ['\\\n', '']),
    ('\\\r\n', ['\\\r\n', '']),
    ('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):
    tree = parso.parse(string)
    leaf = tree.children[0]
    assert leaf.type == 'endmarker'

Ejemplo n.º 16

0

Mostrar archivo

from csv import reader, writer
from datetime import datetime, date
from logging import getLogger
import re

from django.contrib.gis.db import models
from django.db.models.fields.related import ManyToManyField
from django.utils.six import StringIO, text_type, PY3

from multigtfs.compat import (get_blank_value, write_text_rows, Manager,
                              QuerySet)

logger = getLogger(__name__)
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
batch_size = 1000
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8


class BaseQuerySet(QuerySet):
    def populated_column_map(self):
        '''Return the _column_map without unused optional fields'''
        column_map = []
        cls = self.model
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_name = field_pattern.split('__', 1)[0]
            else:
                field_name = field_pattern

            # Handle point fields

Ejemplo n.º 17

0

Mostrar archivo

Archivo: lowlevel.py Proyecto: yssource/pudb

        if os.path.exists(fullname):
            return fullname
    return None


# }}}

# {{{ file encoding detection
# the main idea stolen from Python 3.1's tokenize.py, by Ka-Ping Yee

import re

cookie_re = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
from codecs import lookup, BOM_UTF8
if PY3:
    BOM_UTF8 = BOM_UTF8.decode()


def detect_encoding(lines):
    """
    The detect_encoding() function is used to detect the encoding that should
    be used to decode a Python source file. It requires one argment, lines,
    iterable lines stream.

    It will read a maximum of two lines, and return the encoding used
    (as a string) and a list of any lines (left as bytes) it has read
    in.

    It detects the encoding from the presence of a utf-8 bom or an encoding
    cookie as specified in pep-0263. If both a bom and a cookie are present,
    but disagree, a SyntaxError will be raised. If the encoding cookie is an

Ejemplo n.º 18

0

Mostrar archivo

Archivo: agate_helper.py Proyecto: chrisp-data/all_data

from codecs import BOM_UTF8

import agate
import datetime
import isodate
import json
import dbt.utils
from typing import Iterable, List, Dict, Union, Optional, Any

from dbt.exceptions import RuntimeException


BOM = BOM_UTF8.decode('utf-8')  # '\ufeff'


class ISODateTime(agate.data_types.DateTime):
    def cast(self, d):
        # this is agate.data_types.DateTime.cast with the "clever" bits removed
        # so we only handle ISO8601 stuff
        if isinstance(d, datetime.datetime) or d is None:
            return d
        elif isinstance(d, datetime.date):
            return datetime.datetime.combine(d, datetime.time(0, 0, 0))
        elif isinstance(d, str):
            d = d.strip()
            if d.lower() in self.null_values:
                return None
        try:
            return isodate.parse_datetime(d)
        except:  # noqa
            pass

Ejemplo n.º 19

0

Mostrar archivo

    # Python 3
    from urllib.request import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

import os
import sys

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

if sys.version > "3":
    BOM_UTF8 = str(BOM_UTF8.decode("utf-8"))
else:
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

#### COMMONSENSE SEMANTIC NETWORK ##################################################################

#--- CONCEPT ---------------------------------------------------------------------------------------


class Concept(Node):
    def __init__(self, *args, **kwargs):
        """ A concept in the sematic network.
        """
        Node.__init__(self, *args, **kwargs)
        self._properties = None

Ejemplo n.º 20

0

Mostrar archivo

from itertools import chain
import os
import sys

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

from codecs import BOM_UTF8
if sys.version > "3":
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

    basestring = str

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

#### COMMONSENSE SEMANTIC NETWORK ########################################

#--- CONCEPT -------------------------------------------------------------


class Concept(Node):
    def __init__(self, *args, **kwargs):

Ejemplo n.º 21

0

Mostrar archivo

INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END

TokenCollection = namedtuple(
    'TokenCollection',
    'pseudo_token single_quoted triple_quoted endpats whitespace '
    'fstring_pattern_map always_break_tokens',
)

BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

_token_collection_cache = {}

if sys.version_info.major >= 3:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
else:
    # Python 2 doesn't, but it's not that important anymore and if you tokenize
    # Python 2 code with this, it's still ok. It's just that parsing Python 3
    # code with this function is not 100% correct.
    # This just means that Python 2 code matches a few identifiers too much,
    # but that doesn't really matter.
    def is_identifier(s):
        return True

Ejemplo n.º 22

0

Mostrar archivo

Archivo: account_invoice.py Proyecto: eddybc/borreguito

# Copyright 2017, Jarsa Sistemas, S.A. de C.V.
# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl).

import base64
from codecs import BOM_UTF8

from suds.client import Client

from odoo import _, api, models, tools
from odoo.tools.float_utils import float_repr

BOM_UTF8U = BOM_UTF8.decode('UTF-8')
CFDI_SAT_QR_STATE = {
    'No Encontrado': 'not_found',
    'Cancelado': 'cancelled',
    'Vigente': 'valid',
}


class AccountInvoice(models.Model):
    _inherit = 'account.invoice'

    @api.multi
    def generate_xml_attachment(self):
        self.ensure_one()
        if not self.l10n_mx_edi_cfdi:
            return False
        fname = ("%s-%s-MX-Bill-%s.xml" %
                 (self.journal_id.code, self.reference,
                  self.company_id.partner_id.vat or '')).replace('/', '')
        data_attach = {

Ejemplo n.º 23

0

Mostrar archivo

Archivo: utils.py Proyecto: pydanny/pelican

 def __enter__(self):
     with open(self.filename, encoding='utf-8') as infile:
         content = infile.read()
     if content[0] == BOM_UTF8.decode('utf8'):
         content = content[1:]
     return content

Ejemplo n.º 24

0

Mostrar archivo

Archivo: base.py Proyecto: derickl/gtfs-server

    def import_txt(cls, txt_file, feed, filter_func=None):
        '''Import from the GTFS text file'''

        # Setup the conversion from GTFS to Django Format
        # Conversion functions
        def no_convert(value): return value

        def date_convert(value): return datetime.strptime(value, '%Y%m%d')

        def bool_convert(value): return (value == '1')

        def char_convert(value): return (value or '')

        def null_convert(value): return (value or None)

        def point_convert(value): return (value or 0.0)

        cache = {}

        def default_convert(field):
            def get_value_or_default(value):
                if value == '' or value is None:
                    return field.get_default()
                else:
                    return value
            return get_value_or_default

        def instance_convert(field, feed, rel_name):
            def get_instance(value):
                if value.strip():
                    key1 = "{}:{}".format(field.rel.to.__name__, rel_name)
                    key2 = text_type(value)

                    # Load existing objects
                    if key1 not in cache:
                        pairs = field.rel.to.objects.filter(
                            **{field.rel.to._rel_to_feed: feed}).values_list(
                            rel_name, 'id')
                        cache[key1] = dict((text_type(x), i) for x, i in pairs)

                    # Create new?
                    if key2 not in cache[key1]:
                        kwargs = {
                            field.rel.to._rel_to_feed: feed,
                            rel_name: value}
                        cache[key1][key2] = field.rel.to.objects.create(
                            **kwargs).id
                    return cache[key1][key2]
                else:
                    return None
            return get_instance

        # Check unique fields
        column_names = [c for c, _ in cls._column_map]
        for unique_field in cls._unique_fields:
            assert unique_field in column_names, \
                '{} not in {}'.format(unique_field, column_names)

        # Map of field_name to converters from GTFS to Django format
        val_map = dict()
        name_map = dict()
        point_map = dict()
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_base, rel_name = field_pattern.split('__', 1)
                field_name = field_base + '_id'
            else:
                field_name = field_base = field_pattern
            # Use the field name in the name mapping
            name_map[csv_name] = field_name

            # Is it a point field?
            point_match = re_point.match(field_name)
            if point_match:
                field = None
            else:
                field = cls._meta.get_field_by_name(field_base)[0]

            # Pick a conversion function for the field
            if point_match:
                converter = point_convert
            elif isinstance(field, models.DateField):
                converter = date_convert
            elif isinstance(field, models.BooleanField):
                converter = bool_convert
            elif isinstance(field, models.CharField):
                converter = char_convert
            elif field.rel:
                converter = instance_convert(field, feed, rel_name)
                assert not isinstance(field, models.ManyToManyField)
            elif field.null:
                converter = null_convert
            elif field.has_default():
                converter = default_convert(field)
            else:
                converter = no_convert

            if point_match:
                index = int(point_match.group('index'))
                point_map[csv_name] = (index, converter)
            else:
                val_map[csv_name] = converter

        # Read and convert the source txt
        csv_reader = reader(txt_file)
        unique_line = dict()
        count = 0
        first = True
        extra_counts = defaultdict(int)
        if PY3:  # pragma: no cover
            bom = BOM_UTF8.decode('utf-8')
        else:  # pragma: no cover
            bom = BOM_UTF8
        new_objects = []
        for row in csv_reader:
            if first:
                # Read the columns
                columns = row
                if columns[0].startswith(bom):
                    columns[0] = columns[0][len(bom):]
                first = False
                continue

            if filter_func and not filter_func(zip(columns, row)):
                continue

            # Read a data row
            fields = dict()
            point_coords = [None, None]
            ukey_values = {}
            if cls._rel_to_feed == 'feed':
                fields['feed'] = feed
            for column_name, value in zip(columns, row):
                if column_name not in name_map:
                    val = null_convert(value)
                    if val is not None:
                        fields.setdefault('extra_data', {})[column_name] = val
                        extra_counts[column_name] += 1
                elif column_name in val_map:
                    fields[name_map[column_name]] = val_map[column_name](value)
                else:
                    assert column_name in point_map
                    pos, converter = point_map[column_name]
                    point_coords[pos] = converter(value)

                # Is it part of the unique key?
                if column_name in cls._unique_fields:
                    ukey_values[column_name] = value

            # Join the lat/long into a point
            if point_map:
                assert point_coords[0] and point_coords[1]
                fields['point'] = "POINT(%s)" % (' '.join(point_coords))

            # Is the item unique?
            """
            ukey = tuple(ukey_values.get(u) for u in cls._unique_fields)
            if ukey in unique_line:
                logger.warning(
                    '%s line %d is a duplicate of line %d, not imported.',
                    cls._filename, csv_reader.line_num, unique_line[ukey])
                continue
            else:
                unique_line[ukey] = csv_reader.line_num
            """
            # Create after accumulating a batch
            new_objects.append(cls(**fields))
            if len(new_objects) % batch_size == 0:  # pragma: no cover
                cls.objects.bulk_create(new_objects)
                count += len(new_objects)
                logger.info(
                    "Imported %d %s",
                    count, cls._meta.verbose_name_plural)
                new_objects = []

        # Create remaining objects
        if new_objects:
            cls.objects.bulk_create(new_objects)

        # Take note of extra fields
        if extra_counts:
            extra_columns = feed.meta.setdefault(
                'extra_columns', {}).setdefault(cls.__name__, [])
            for column in columns:
                if column in extra_counts and column not in extra_columns:
                    extra_columns.append(column)
            feed.save()
        return len(unique_line)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: lowlevel.py Proyecto: DXist/pudb

            dirname = os.readlink(dirname)
        fullname = os.path.join(dirname, filename)
        if os.path.exists(fullname):
            return fullname
    return None

# }}}

# {{{ file encoding detection
# stolen from Python 3.1's tokenize.py, by Ka-Ping Yee

import re
cookie_re = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
from codecs import lookup, BOM_UTF8
if PY3:
    BOM_UTF8 = BOM_UTF8.decode()


def detect_encoding(readline):
    """
    The detect_encoding() function is used to detect the encoding that should
    be used to decode a Python source file. It requires one argment, readline,
    in the same way as the tokenize() generator.

    It will call readline a maximum of twice, and return the encoding used
    (as a string) and a list of any lines (left as bytes) it has read
    in.

    It detects the encoding from the presence of a utf-8 bom or an encoding
    cookie as specified in pep-0263. If both a bom and a cookie are present,
    but disagree, a SyntaxError will be raised. If the encoding cookie is an

Ejemplo n.º 26

0

Mostrar archivo

Archivo: har2lib.py Proyecto: diemori/har2lib

def lstrip_bom(str_, bom=BOM_UTF8.decode('utf-8-sig')):
    if str_.startswith(bom):
        return str_[len(bom):]
    else:
        return str_

Ejemplo n.º 27

0

Mostrar archivo

Archivo: utils.py Proyecto: jaknel/pelican

 def __enter__(self):
     content = open(self.filename, encoding="utf-8").read()
     if content[0] == BOM_UTF8.decode("utf8"):
         content = content[1:]
     return content

Ejemplo n.º 28

0

Mostrar archivo

Archivo: commonsense.py Proyecto: clips/pattern

    # Python 3
    from urllib.request import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

import os
import sys

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

if sys.version > "3":
    BOM_UTF8 = str(BOM_UTF8.decode("utf-8"))
else:
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

#### COMMONSENSE SEMANTIC NETWORK ##################################################################

#--- CONCEPT ---------------------------------------------------------------------------------------


class Concept(Node):

    def __init__(self, *args, **kwargs):
        """ A concept in the sematic network.
        """
        Node.__init__(self, *args, **kwargs)
        self._properties = None

Ejemplo n.º 29

0

Mostrar archivo

Archivo: ws_fix_data_xml.py Proyecto: vauxoo-dev/gist-vauxoo

# Fixed XML when is not decoded
import oerplib
import argparse
import base64
from lxml import objectify
from codecs import BOM_UTF8
BOM_UTF8U = BOM_UTF8.decode('UTF-8')

PARSER = argparse.ArgumentParser()
PARSER.add_argument("-d", "--db", help="DataBase Name", required=True)
PARSER.add_argument("-r", "--user", help="OpenERP User", required=True)
PARSER.add_argument("-w", "--passwd", help="OpenERP Password", required=True)
PARSER.add_argument("-p", "--port",
                    type=int,
                    help="Port, 8069 for default", default="8069")
PARSER.add_argument("-s", "--server",
                    help="Server IP, 127.0.0.1 for default",
                    default="127.0.0.1")
ARGS = PARSER.parse_args()

if ARGS.db is None or ARGS.user is None or ARGS.passwd is None:
    print "Must be specified DataBase, User and Password"
    quit()

DB_NAME = ARGS.db
USER = ARGS.user
PASSW = ARGS.passwd
SERVER = ARGS.server
PORT = ARGS.port

OERP_CONNECT = oerplib.OERP(SERVER,

Ejemplo n.º 30

0

Mostrar archivo

decode_utf8 = decode_string
encode_utf8 = encode_string

#### CACHE #########################################################################################
# Caching is implemented in URL.download(), which is used by all other downloaders.

import os
import glob
import tempfile
import datetime

from io import open

from codecs import BOM_UTF8

BOM_UTF8 = BOM_UTF8.decode('utf-8')

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

TMP = os.path.join(tempfile.gettempdir(), "pattern_web")


def date_now():
    return datetime.datetime.today()


def date_modified(path):
    return datetime.datetime.fromtimestamp(os.stat(path)[8])