Python BOM_UTF8.decodeの例、codecs.BOM_UTF8.decode Pythonの例

コード例 #1

1

ファイルを表示

ファイル: fontmaker.py プロジェクト: YorkJong/pyFontMaker

def read_unicode(fn):
    """Read an Unicode file that may encode with utf_16_le, utf_16_be, or utf_8.
    """
    from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF8

    with open(fn, "rb") as in_file:
        bs = in_file.read()

    if  bs.startswith(BOM_UTF16_LE):
        us = bs.decode("utf_16_le").lstrip(BOM_UTF16_LE.decode("utf_16_le"))
    elif  bs.startswith(BOM_UTF16_BE):
        us = bs.decode("utf_16_be").lstrip(BOM_UTF16_BE.decode("utf_16_be"))
    else:
        us = bs.decode("utf_8").lstrip(BOM_UTF8.decode("utf_8"))

    return us

コード例 #2

0

ファイルを表示

ファイル: stop.py プロジェクト: derickl/gtfs-server

    def test_import_stops_txt_bom(self):
        if PY3:  # pragma: no cover
            text = (BOM_UTF8.decode('utf-8') + """\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
        else:
            text = (BOM_UTF8 + b"""\
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,\
location_type,parent_station,stop_timezone
FUR_CREEK_RES,FC,Furnace Creek Resort,,36.425288,-117.133162,A,\
http://example.com/fcr,0,FUR_CREEK_STA,
FUR_CREEK_STA,,Furnace Creek Station,"Our Station",36.425288,-117.133162,A,\
http://example.com,1,,America/Los_Angeles
""")
        stops_txt = StringIO(text)
        Stop.import_txt(stops_txt, self.feed)
        self.assertEqual(Stop.objects.count(), 2)
        station = Stop.objects.get(stop_id='FUR_CREEK_STA')
        stop = Stop.objects.get(stop_id='FUR_CREEK_RES')
        self.assertEqual(stop.parent_station, station)

コード例 #3

0

ファイルを表示

ファイル: test_utils.py プロジェクト: Jim-tech/windows-vim

def test_utf8_bom():
    unicode_bom = BOM_UTF8.decode('utf-8')

    module = parso.parse(unicode_bom)
    endmarker = module.children[0]
    assert endmarker.type == 'endmarker'
    assert unicode_bom == endmarker.prefix

    module = parso.parse(unicode_bom + 'foo = 1')
    expr_stmt = module.children[0]
    assert expr_stmt.type == 'expr_stmt'
    assert unicode_bom == expr_stmt.get_first_leaf().prefix

コード例 #4

0

ファイルを表示

ファイル: compat.py プロジェクト: alaw005/django-multi-gtfs

def bom_prefix_csv(text):
    """
    Prefix CSV text with a Byte-order Marker (BOM).

    The return value needs to be encoded differently so the CSV reader will
    handle the BOM correctly:
    - Python 2 returns a UTF-8 encoded bytestring
    - Python 3 returns unicode text
    """
    if PY3:
        return BOM_UTF8.decode('utf-8') + text
    else:
        return BOM_UTF8 + text.encode('utf-8')

コード例 #5

0

ファイルを表示

ファイル: agency.py プロジェクト: derickl/gtfs-server

    def test_import_bom(self):
        if PY3:  # pragma: no cover
            text = (BOM_UTF8.decode('utf-8') + """\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
        else:
            text = (BOM_UTF8 + b"""\
agency_name,agency_url,agency_timezone
Demo Transit Authority,http://google.com,America/Los_Angeles
""")
        agency_txt = StringIO(text)
        Agency.import_txt(agency_txt, self.feed)
        agency = Agency.objects.get()
        self.assertEqual(agency.agency_id, '')
        self.assertEqual(agency.name, 'Demo Transit Authority')
        self.assertEqual(agency.url, 'http://google.com')
        self.assertEqual(agency.timezone, 'America/Los_Angeles')
        self.assertEqual(agency.lang, '')
        self.assertEqual(agency.phone, '')
        self.assertEqual(agency.fare_url, '')

コード例 #6

0

ファイルを表示

ファイル: test_builtin_jy.py プロジェクト: varialus/jython-legacy

 def test_eval_bom(self):
     self.assertEqual(eval(BOM_UTF8 + '"foo"'), 'foo')
     # Actual BOM ignored, so causes a SyntaxError
     self.assertRaises(SyntaxError, eval,
                       BOM_UTF8.decode('iso-8859-1') + '"foo"')

コード例 #7

0

ファイルを表示

ファイル: __init__.py プロジェクト: clips/pattern

decode_utf8 = decode_string
encode_utf8 = encode_string

#### CACHE #########################################################################################
# Caching is implemented in URL.download(), which is used by all other downloaders.

import os
import glob
import tempfile
import datetime

from io import open

from codecs import BOM_UTF8
BOM_UTF8 = BOM_UTF8.decode('utf-8')

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

TMP = os.path.join(tempfile.gettempdir(), "pattern_web")


def date_now():
    return datetime.datetime.today()


def date_modified(path):
    return datetime.datetime.fromtimestamp(os.stat(path)[8])

コード例 #8

0

ファイルを表示

ファイル: commonsense.py プロジェクト: pattern3/pattern

from itertools import chain
import os
import sys

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

from codecs import BOM_UTF8
if sys.version > "3":
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

    basestring = str

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

#### COMMONSENSE SEMANTIC NETWORK ########################################

#--- CONCEPT -------------------------------------------------------------


class Concept(Node):

コード例 #9

0

ファイルを表示

ファイル: test_prefix.py プロジェクト: BlackArch/blackarch-iso

try:
    from itertools import zip_longest
except ImportError:
    # Python 2
    from itertools import izip_longest as zip_longest

from codecs import BOM_UTF8

import pytest

import parso

unicode_bom = BOM_UTF8.decode('utf-8')


@pytest.mark.parametrize(('string', 'tokens'), [
    ('', ['']),
    ('#', ['#', '']),
    (' # ', ['# ', '']),
    (' # \n', ['# ', '\n', '']),
    (' # \f\n', ['# ', '\f', '\n', '']),
    ('  \n', ['\n', '']),
    ('  \n ', ['\n', ' ']),
    (' \f ', ['\f', ' ']),
    (' \f ', ['\f', ' ']),
    (' \r\n', ['\r\n', '']),
    ('\\\n', ['\\\n', '']),
    ('\\\r\n', ['\\\r\n', '']),
    ('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):

コード例 #10

0

ファイルを表示

ファイル: utils.py プロジェクト: lvzongting/pelican

 def __enter__(self):
     with open(self.filename, encoding='utf-8') as infile:
         content = infile.read()
     if content[0] == BOM_UTF8.decode('utf8'):
         content = content[1:]
     return content

コード例 #11

0

ファイルを表示

ファイル: test_builtin_jy.py プロジェクト: aniket134/XBMC-video-plugins

 def test_eval_bom(self):
     self.assertEqual(eval(BOM_UTF8 + '"foo"'), 'foo')
     # Actual BOM ignored, so causes a SyntaxError
     self.assertRaises(SyntaxError, eval,
                       BOM_UTF8.decode('iso-8859-1') + '"foo"')

コード例 #12

0

ファイルを表示

ファイル: tokenize.py プロジェクト: andrewgu12/config

from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
                                NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
                                ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
                                FSTRING_END)
from parso._compatibility import py_version
from parso.utils import split_lines


TokenCollection = namedtuple(
    'TokenCollection',
    'pseudo_token single_quoted triple_quoted endpats whitespace '
    'fstring_pattern_map always_break_tokens',
)

BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

_token_collection_cache = {}

if py_version >= 30:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
else:
    namechars = string.ascii_letters + '_'
    is_identifier = lambda s: s in namechars


def group(*choices, **kwargs):
    capture = kwargs.pop('capture', False)  # Python 2, arrghhhhh :(
    assert not kwargs

コード例 #13

0

ファイルを表示

ファイル: base.py プロジェクト: alaw005/django-multi-gtfs

from datetime import datetime, date
from logging import getLogger
import re

from django.contrib.gis.db import models
from django.contrib.gis.db.models.query import GeoQuerySet
from django.db.models.fields.related import ManyToManyField
from django.utils.six import StringIO, text_type, PY3

from multigtfs.compat import get_blank_value, write_text_rows

logger = getLogger(__name__)
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
batch_size = 1000
large_queryset_size = 100000
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8


class BaseQuerySet(GeoQuerySet):
    def populated_column_map(self):
        '''Return the _column_map without unused optional fields'''
        column_map = []
        cls = self.model
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_name = field_pattern.split('__', 1)[0]
            else:
                field_name = field_pattern

            # Handle point fields

コード例 #14

0

ファイルを表示

ファイル: tokenize.py プロジェクト: sandnattanicha/Final-Project

from __future__ import absolute_import

import itertools as _itertools
import re
import sys
from codecs import BOM_UTF8
from collections import namedtuple
from dataclasses import dataclass
from typing import Dict, Generator, Iterable, Optional, Pattern, Set, Tuple

from libcst._parser.parso.python.token import PythonTokenTypes
from libcst._parser.parso.utils import PythonVersionInfo, split_lines

# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
MAX_UNICODE = "\U0010ffff"
BOM_UTF8_STRING = BOM_UTF8.decode("utf-8")

STRING = PythonTokenTypes.STRING
NAME = PythonTokenTypes.NAME
NUMBER = PythonTokenTypes.NUMBER
OP = PythonTokenTypes.OP
NEWLINE = PythonTokenTypes.NEWLINE
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ASYNC = PythonTokenTypes.ASYNC
AWAIT = PythonTokenTypes.AWAIT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING

コード例 #15

0

ファイルを表示

ファイル: test_prefix.py プロジェクト: mjjohns1/catboost

from itertools import zip_longest
from codecs import BOM_UTF8

import pytest

import parso

unicode_bom = BOM_UTF8.decode('utf-8')


@pytest.mark.parametrize(('string', 'tokens'), [
    ('', ['']),
    ('#', ['#', '']),
    (' # ', ['# ', '']),
    (' # \n', ['# ', '\n', '']),
    (' # \f\n', ['# ', '\f', '\n', '']),
    ('  \n', ['\n', '']),
    ('  \n ', ['\n', ' ']),
    (' \f ', ['\f', ' ']),
    (' \f ', ['\f', ' ']),
    (' \r\n', ['\r\n', '']),
    (' \r', ['\r', '']),
    ('\\\n', ['\\\n', '']),
    ('\\\r\n', ['\\\r\n', '']),
    ('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):
    tree = parso.parse(string)
    leaf = tree.children[0]
    assert leaf.type == 'endmarker'

コード例 #16

0

ファイルを表示

from csv import reader, writer
from datetime import datetime, date
from logging import getLogger
import re

from django.contrib.gis.db import models
from django.db.models.fields.related import ManyToManyField
from django.utils.six import StringIO, text_type, PY3

from multigtfs.compat import (get_blank_value, write_text_rows, Manager,
                              QuerySet)

logger = getLogger(__name__)
re_point = re.compile(r'(?P<name>point)\[(?P<index>\d)\]')
batch_size = 1000
CSV_BOM = BOM_UTF8.decode('utf-8') if PY3 else BOM_UTF8


class BaseQuerySet(QuerySet):
    def populated_column_map(self):
        '''Return the _column_map without unused optional fields'''
        column_map = []
        cls = self.model
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_name = field_pattern.split('__', 1)[0]
            else:
                field_name = field_pattern

            # Handle point fields

コード例 #17

0

ファイルを表示

ファイル: lowlevel.py プロジェクト: yssource/pudb

        if os.path.exists(fullname):
            return fullname
    return None


# }}}

# {{{ file encoding detection
# the main idea stolen from Python 3.1's tokenize.py, by Ka-Ping Yee

import re

cookie_re = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
from codecs import lookup, BOM_UTF8
if PY3:
    BOM_UTF8 = BOM_UTF8.decode()


def detect_encoding(lines):
    """
    The detect_encoding() function is used to detect the encoding that should
    be used to decode a Python source file. It requires one argment, lines,
    iterable lines stream.

    It will read a maximum of two lines, and return the encoding used
    (as a string) and a list of any lines (left as bytes) it has read
    in.

    It detects the encoding from the presence of a utf-8 bom or an encoding
    cookie as specified in pep-0263. If both a bom and a cookie are present,
    but disagree, a SyntaxError will be raised. If the encoding cookie is an

コード例 #18

0

ファイルを表示

ファイル: agate_helper.py プロジェクト: chrisp-data/all_data

from codecs import BOM_UTF8

import agate
import datetime
import isodate
import json
import dbt.utils
from typing import Iterable, List, Dict, Union, Optional, Any

from dbt.exceptions import RuntimeException


BOM = BOM_UTF8.decode('utf-8')  # '\ufeff'


class ISODateTime(agate.data_types.DateTime):
    def cast(self, d):
        # this is agate.data_types.DateTime.cast with the "clever" bits removed
        # so we only handle ISO8601 stuff
        if isinstance(d, datetime.datetime) or d is None:
            return d
        elif isinstance(d, datetime.date):
            return datetime.datetime.combine(d, datetime.time(0, 0, 0))
        elif isinstance(d, str):
            d = d.strip()
            if d.lower() in self.null_values:
                return None
        try:
            return isodate.parse_datetime(d)
        except:  # noqa
            pass

コード例 #19

0

ファイルを表示

    # Python 3
    from urllib.request import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

import os
import sys

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

if sys.version > "3":
    BOM_UTF8 = str(BOM_UTF8.decode("utf-8"))
else:
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

#### COMMONSENSE SEMANTIC NETWORK ##################################################################

#--- CONCEPT ---------------------------------------------------------------------------------------


class Concept(Node):
    def __init__(self, *args, **kwargs):
        """ A concept in the sematic network.
        """
        Node.__init__(self, *args, **kwargs)
        self._properties = None

コード例 #20

0

ファイルを表示

from itertools import chain
import os
import sys

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

from codecs import BOM_UTF8
if sys.version > "3":
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

    basestring = str

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

#### COMMONSENSE SEMANTIC NETWORK ########################################

#--- CONCEPT -------------------------------------------------------------


class Concept(Node):
    def __init__(self, *args, **kwargs):

コード例 #21

0

ファイルを表示

INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END

TokenCollection = namedtuple(
    'TokenCollection',
    'pseudo_token single_quoted triple_quoted endpats whitespace '
    'fstring_pattern_map always_break_tokens',
)

BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')

_token_collection_cache = {}

if sys.version_info.major >= 3:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
else:
    # Python 2 doesn't, but it's not that important anymore and if you tokenize
    # Python 2 code with this, it's still ok. It's just that parsing Python 3
    # code with this function is not 100% correct.
    # This just means that Python 2 code matches a few identifiers too much,
    # but that doesn't really matter.
    def is_identifier(s):
        return True

コード例 #22

0

ファイルを表示

ファイル: account_invoice.py プロジェクト: eddybc/borreguito

# Copyright 2017, Jarsa Sistemas, S.A. de C.V.
# License LGPL-3.0 or later (http://www.gnu.org/licenses/lgpl).

import base64
from codecs import BOM_UTF8

from suds.client import Client

from odoo import _, api, models, tools
from odoo.tools.float_utils import float_repr

BOM_UTF8U = BOM_UTF8.decode('UTF-8')
CFDI_SAT_QR_STATE = {
    'No Encontrado': 'not_found',
    'Cancelado': 'cancelled',
    'Vigente': 'valid',
}


class AccountInvoice(models.Model):
    _inherit = 'account.invoice'

    @api.multi
    def generate_xml_attachment(self):
        self.ensure_one()
        if not self.l10n_mx_edi_cfdi:
            return False
        fname = ("%s-%s-MX-Bill-%s.xml" %
                 (self.journal_id.code, self.reference,
                  self.company_id.partner_id.vat or '')).replace('/', '')
        data_attach = {

コード例 #23

0

ファイルを表示

ファイル: utils.py プロジェクト: pydanny/pelican

 def __enter__(self):
     with open(self.filename, encoding='utf-8') as infile:
         content = infile.read()
     if content[0] == BOM_UTF8.decode('utf8'):
         content = content[1:]
     return content

コード例 #24

0

ファイルを表示

ファイル: base.py プロジェクト: derickl/gtfs-server

    def import_txt(cls, txt_file, feed, filter_func=None):
        '''Import from the GTFS text file'''

        # Setup the conversion from GTFS to Django Format
        # Conversion functions
        def no_convert(value): return value

        def date_convert(value): return datetime.strptime(value, '%Y%m%d')

        def bool_convert(value): return (value == '1')

        def char_convert(value): return (value or '')

        def null_convert(value): return (value or None)

        def point_convert(value): return (value or 0.0)

        cache = {}

        def default_convert(field):
            def get_value_or_default(value):
                if value == '' or value is None:
                    return field.get_default()
                else:
                    return value
            return get_value_or_default

        def instance_convert(field, feed, rel_name):
            def get_instance(value):
                if value.strip():
                    key1 = "{}:{}".format(field.rel.to.__name__, rel_name)
                    key2 = text_type(value)

                    # Load existing objects
                    if key1 not in cache:
                        pairs = field.rel.to.objects.filter(
                            **{field.rel.to._rel_to_feed: feed}).values_list(
                            rel_name, 'id')
                        cache[key1] = dict((text_type(x), i) for x, i in pairs)

                    # Create new?
                    if key2 not in cache[key1]:
                        kwargs = {
                            field.rel.to._rel_to_feed: feed,
                            rel_name: value}
                        cache[key1][key2] = field.rel.to.objects.create(
                            **kwargs).id
                    return cache[key1][key2]
                else:
                    return None
            return get_instance

        # Check unique fields
        column_names = [c for c, _ in cls._column_map]
        for unique_field in cls._unique_fields:
            assert unique_field in column_names, \
                '{} not in {}'.format(unique_field, column_names)

        # Map of field_name to converters from GTFS to Django format
        val_map = dict()
        name_map = dict()
        point_map = dict()
        for csv_name, field_pattern in cls._column_map:
            # Separate the local field name from foreign columns
            if '__' in field_pattern:
                field_base, rel_name = field_pattern.split('__', 1)
                field_name = field_base + '_id'
            else:
                field_name = field_base = field_pattern
            # Use the field name in the name mapping
            name_map[csv_name] = field_name

            # Is it a point field?
            point_match = re_point.match(field_name)
            if point_match:
                field = None
            else:
                field = cls._meta.get_field_by_name(field_base)[0]

            # Pick a conversion function for the field
            if point_match:
                converter = point_convert
            elif isinstance(field, models.DateField):
                converter = date_convert
            elif isinstance(field, models.BooleanField):
                converter = bool_convert
            elif isinstance(field, models.CharField):
                converter = char_convert
            elif field.rel:
                converter = instance_convert(field, feed, rel_name)
                assert not isinstance(field, models.ManyToManyField)
            elif field.null:
                converter = null_convert
            elif field.has_default():
                converter = default_convert(field)
            else:
                converter = no_convert

            if point_match:
                index = int(point_match.group('index'))
                point_map[csv_name] = (index, converter)
            else:
                val_map[csv_name] = converter

        # Read and convert the source txt
        csv_reader = reader(txt_file)
        unique_line = dict()
        count = 0
        first = True
        extra_counts = defaultdict(int)
        if PY3:  # pragma: no cover
            bom = BOM_UTF8.decode('utf-8')
        else:  # pragma: no cover
            bom = BOM_UTF8
        new_objects = []
        for row in csv_reader:
            if first:
                # Read the columns
                columns = row
                if columns[0].startswith(bom):
                    columns[0] = columns[0][len(bom):]
                first = False
                continue

            if filter_func and not filter_func(zip(columns, row)):
                continue

            # Read a data row
            fields = dict()
            point_coords = [None, None]
            ukey_values = {}
            if cls._rel_to_feed == 'feed':
                fields['feed'] = feed
            for column_name, value in zip(columns, row):
                if column_name not in name_map:
                    val = null_convert(value)
                    if val is not None:
                        fields.setdefault('extra_data', {})[column_name] = val
                        extra_counts[column_name] += 1
                elif column_name in val_map:
                    fields[name_map[column_name]] = val_map[column_name](value)
                else:
                    assert column_name in point_map
                    pos, converter = point_map[column_name]
                    point_coords[pos] = converter(value)

                # Is it part of the unique key?
                if column_name in cls._unique_fields:
                    ukey_values[column_name] = value

            # Join the lat/long into a point
            if point_map:
                assert point_coords[0] and point_coords[1]
                fields['point'] = "POINT(%s)" % (' '.join(point_coords))

            # Is the item unique?
            """
            ukey = tuple(ukey_values.get(u) for u in cls._unique_fields)
            if ukey in unique_line:
                logger.warning(
                    '%s line %d is a duplicate of line %d, not imported.',
                    cls._filename, csv_reader.line_num, unique_line[ukey])
                continue
            else:
                unique_line[ukey] = csv_reader.line_num
            """
            # Create after accumulating a batch
            new_objects.append(cls(**fields))
            if len(new_objects) % batch_size == 0:  # pragma: no cover
                cls.objects.bulk_create(new_objects)
                count += len(new_objects)
                logger.info(
                    "Imported %d %s",
                    count, cls._meta.verbose_name_plural)
                new_objects = []

        # Create remaining objects
        if new_objects:
            cls.objects.bulk_create(new_objects)

        # Take note of extra fields
        if extra_counts:
            extra_columns = feed.meta.setdefault(
                'extra_columns', {}).setdefault(cls.__name__, [])
            for column in columns:
                if column in extra_counts and column not in extra_columns:
                    extra_columns.append(column)
            feed.save()
        return len(unique_line)

コード例 #25

0

ファイルを表示

ファイル: lowlevel.py プロジェクト: DXist/pudb

            dirname = os.readlink(dirname)
        fullname = os.path.join(dirname, filename)
        if os.path.exists(fullname):
            return fullname
    return None

# }}}

# {{{ file encoding detection
# stolen from Python 3.1's tokenize.py, by Ka-Ping Yee

import re
cookie_re = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
from codecs import lookup, BOM_UTF8
if PY3:
    BOM_UTF8 = BOM_UTF8.decode()


def detect_encoding(readline):
    """
    The detect_encoding() function is used to detect the encoding that should
    be used to decode a Python source file. It requires one argment, readline,
    in the same way as the tokenize() generator.

    It will call readline a maximum of twice, and return the encoding used
    (as a string) and a list of any lines (left as bytes) it has read
    in.

    It detects the encoding from the presence of a utf-8 bom or an encoding
    cookie as specified in pep-0263. If both a bom and a cookie are present,
    but disagree, a SyntaxError will be raised. If the encoding cookie is an

コード例 #26

0

ファイルを表示

ファイル: har2lib.py プロジェクト: diemori/har2lib

def lstrip_bom(str_, bom=BOM_UTF8.decode('utf-8-sig')):
    if str_.startswith(bom):
        return str_[len(bom):]
    else:
        return str_

コード例 #27

0

ファイルを表示

ファイル: utils.py プロジェクト: jaknel/pelican

 def __enter__(self):
     content = open(self.filename, encoding="utf-8").read()
     if content[0] == BOM_UTF8.decode("utf8"):
         content = content[1:]
     return content

コード例 #28

0

ファイルを表示

ファイル: commonsense.py プロジェクト: clips/pattern

    # Python 3
    from urllib.request import urlopen

from .__init__ import Graph, Node, Edge, bfs
from .__init__ import WEIGHT, CENTRALITY, EIGENVECTOR, BETWEENNESS

import os
import sys

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

if sys.version > "3":
    BOM_UTF8 = str(BOM_UTF8.decode("utf-8"))
else:
    BOM_UTF8 = BOM_UTF8.decode("utf-8")

#### COMMONSENSE SEMANTIC NETWORK ##################################################################

#--- CONCEPT ---------------------------------------------------------------------------------------


class Concept(Node):

    def __init__(self, *args, **kwargs):
        """ A concept in the sematic network.
        """
        Node.__init__(self, *args, **kwargs)
        self._properties = None

コード例 #29

0

ファイルを表示

ファイル: ws_fix_data_xml.py プロジェクト: vauxoo-dev/gist-vauxoo

# Fixed XML when is not decoded
import oerplib
import argparse
import base64
from lxml import objectify
from codecs import BOM_UTF8
BOM_UTF8U = BOM_UTF8.decode('UTF-8')

PARSER = argparse.ArgumentParser()
PARSER.add_argument("-d", "--db", help="DataBase Name", required=True)
PARSER.add_argument("-r", "--user", help="OpenERP User", required=True)
PARSER.add_argument("-w", "--passwd", help="OpenERP Password", required=True)
PARSER.add_argument("-p", "--port",
                    type=int,
                    help="Port, 8069 for default", default="8069")
PARSER.add_argument("-s", "--server",
                    help="Server IP, 127.0.0.1 for default",
                    default="127.0.0.1")
ARGS = PARSER.parse_args()

if ARGS.db is None or ARGS.user is None or ARGS.passwd is None:
    print "Must be specified DataBase, User and Password"
    quit()

DB_NAME = ARGS.db
USER = ARGS.user
PASSW = ARGS.passwd
SERVER = ARGS.server
PORT = ARGS.port

OERP_CONNECT = oerplib.OERP(SERVER,

コード例 #30

0

ファイルを表示

decode_utf8 = decode_string
encode_utf8 = encode_string

#### CACHE #########################################################################################
# Caching is implemented in URL.download(), which is used by all other downloaders.

import os
import glob
import tempfile
import datetime

from io import open

from codecs import BOM_UTF8

BOM_UTF8 = BOM_UTF8.decode('utf-8')

try:
    MODULE = os.path.dirname(os.path.realpath(__file__))
except:
    MODULE = ""

TMP = os.path.join(tempfile.gettempdir(), "pattern_web")


def date_now():
    return datetime.datetime.today()


def date_modified(path):
    return datetime.datetime.fromtimestamp(os.stat(path)[8])