Example #1
0
def import_versions_from_file(csv_filename, columns):
    """
    Import the versions in the columns listed in `columns`
    :param columns: zero-based list of column numbers with a new version in them
    :return:
    """
    csv.field_size_limit(sys.maxsize)
    with open(csv_filename, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        rows = [row for row in reader]
    return _import_versions_from_csv(rows, columns)
Example #2
0
def import_versions_from_file(csv_filename, columns):
    """
    Import the versions in the columns listed in `columns`
    :param columns: zero-based list of column numbers with a new version in them
    :return:
    """
    csv.field_size_limit(sys.maxsize)
    with open(csv_filename, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        rows = [row for row in reader]
    return _import_versions_from_csv(rows, columns)
Example #3
0
def getCSVReader(datafile, delimiter):
    '''
   @summary: Get a CSV reader that can handle encoding
   '''
    f = None
    unicodecsv.field_size_limit(sys.maxsize)
    try:
        f = open(datafile, 'rb')
        reader = unicodecsv.reader(f, delimiter=delimiter, encoding=ENCODING)
    except Exception, e:
        raise Exception('Failed to read or open {}, ({})'.format(
            datafile, str(e)))
def load_csvgz_in_chunks(file_name, saveing_function):
    """Load data from a gzipped CSV and pass it to a save function
    in chunks."""
    csv.field_size_limit(sys.maxsize)
    chunk_size = 3000
    with gzip.open(file_name, "r") as f :
        stored_stream = []
        reader = csv.DictReader(f)
        for num, row in enumerate(reader):
            stored_stream.append(row)
            if (num % chunk_size) == 0 and num != 0:
                saveing_function(stored_stream)
                stored_stream = []
    return True
Example #5
0
def getCSVWriter(datafile, delimiter, doAppend=True):
    '''
   @summary: Get a CSV writer that can handle encoding
   '''
    unicodecsv.field_size_limit(sys.maxsize)
    if doAppend:
        mode = 'ab'
    else:
        mode = 'wb'

    try:
        f = open(datafile, mode)
        writer = unicodecsv.writer(f, delimiter=delimiter, encoding=ENCODING)

    except Exception, e:
        raise Exception('Failed to read or open {}, ({})'.format(
            datafile, str(e)))
Example #6
0
def export_version_csv(index, version_list):
    assert isinstance(index, AbstractIndex)
    assert isinstance(version_list, list) or isinstance(
        version_list, VersionSet)
    assert all(isinstance(v, Version) for v in version_list)

    csv.field_size_limit(sys.maxsize)

    output = io.BytesIO()
    writer = csv.writer(output)

    # write header data
    writer.writerow(["Index Title"] + [index.title for _ in version_list])
    writer.writerow(["Version Title"] + [v.versionTitle for v in version_list])
    writer.writerow(["Language"] + [v.language for v in version_list])
    writer.writerow(["Version Source"] +
                    [v.versionSource for v in version_list])
    writer.writerow(["Version Notes"] +
                    [getattr(v, "versionNotes", "") for v in version_list])

    section_refs = index.all_section_refs()

    for section_ref in section_refs:
        segment_refs = section_ref.all_subrefs()
        seg_vers = {}

        # set blank array for version data
        for ref in segment_refs:
            seg_vers[ref.normal()] = []

        # populate each version
        for version in version_list:
            section = section_ref.text(vtitle=version.versionTitle,
                                       lang=version.language).text
            for ref in segment_refs:
                if ref.sections[-1] > len(section):
                    seg_vers[ref.normal()] += [""]
                else:
                    seg_vers[ref.normal()] += [section[ref.sections[-1] - 1]]

        # write lines for each section
        for ref in segment_refs:
            writer.writerow([ref.normal()] + seg_vers[ref.normal()])

    return output.getvalue()
Example #7
0
def file_reader(path):
    colDict = dict()
    row = None
    index = None
    maxInt = sys.maxsize
    decrement = True

    while decrement:
        # decrease the maxInt value by factor 10
        # as long as the OverflowError occurs.

        decrement = False
        try:
            unicodecsv.field_size_limit(maxInt)

            colList = list()
            with open(path, 'r') as fr:
                data = unicodecsv.reader(fr,
                                         delimiter=C.DELIMITER,
                                         encoding=C.ENCODING,
                                         errors='replace')
                # print C.ENCODING
                for header in data.next():
                    colList.append({header: list()})
                for index, row in enumerate(data):
                    # print row
                    for i, field in enumerate(row):
                        colList[i][colList[i].keys()[0]].append(
                            field[1:].strip())
            colDict = {k: v for d in colList for k, v in d.items()}
        except OverflowError:
            maxInt = int(maxInt / 10)
            decrement = True
        except IndexError:
            # print row
            index = index + 2
            print str(D.datetime.now()) + ">>" + path.split(
                '\\')[-1] + ">>File is not proper>>Row number:" + str(index)
        except Exception as e:
            print str(
                D.datetime.now()) + ">>" + path.split('\\')[-1] + ">>" + str(e)
    return colDict
Example #8
0
def export_version_csv(index, version_list):
    assert isinstance(index, AbstractIndex)
    assert isinstance(version_list, list) or isinstance(version_list, VersionSet)
    assert all(isinstance(v, Version) for v in version_list)

    csv.field_size_limit(sys.maxsize)

    output = io.BytesIO()
    writer = csv.writer(output)

    # write header data
    writer.writerow(["Index Title"] + [index.title for _ in version_list])
    writer.writerow(["Version Title"] + [v.versionTitle for v in version_list])
    writer.writerow(["Language"] + [v.language for v in version_list])
    writer.writerow(["Version Source"] + [v.versionSource for v in version_list])
    writer.writerow(["Version Notes"] + [getattr(v, "versionNotes", "") for v in version_list])

    section_refs = index.all_section_refs()

    for section_ref in section_refs:
        segment_refs = section_ref.all_subrefs()
        seg_vers = {}

        # set blank array for version data
        for ref in segment_refs:
            seg_vers[ref.normal()] = []

        # populate each version
        for version in version_list:
            section = section_ref.text(vtitle=version.versionTitle, lang=version.language).text
            for ref in segment_refs:
                if ref.sections[-1] > len(section):
                    seg_vers[ref.normal()] += [""]
                else:
                    seg_vers[ref.normal()] += [section[ref.sections[-1] - 1]]

        # write lines for each section
        for ref in segment_refs:
            writer.writerow([ref.normal()] + seg_vers[ref.normal()])

    return output.getvalue()
Example #9
0
def export_merged_csv(index, lang=None):
    assert isinstance(index, Index)
    assert lang in ["en", "he"]

    csv.field_size_limit(sys.maxsize)

    output = io.BytesIO()
    writer = csv.writer(output)

    # write header data
    writer.writerow(["Index Title"] + [index.title])
    writer.writerow(["Version Title"] + ["merged"])
    writer.writerow(["Language"] + [lang])
    writer.writerow(["Version Source"] + ["-"])
    writer.writerow(["Version Notes"] + ["-"])

    section_refs = index.all_section_refs()

    for section_ref in section_refs:
        segment_refs = section_ref.all_subrefs()
        seg_vers = {}

        # set blank array for version data
        for ref in segment_refs:
            seg_vers[ref.normal()] = []

        # populate each version
        section = section_ref.text(lang=lang, exclude_copyrighted=True).text
        for ref in segment_refs:
            if ref.sections[-1] > len(section):
                seg_vers[ref.normal()] += [""]
            else:
                seg_vers[ref.normal()] += [section[ref.sections[-1] - 1]]

        # write lines for each section
        for ref in segment_refs:
            writer.writerow([ref.normal()] + seg_vers[ref.normal()])

    return output.getvalue()
Example #10
0
def export_merged_csv(index, lang=None):
    assert isinstance(index, Index)
    assert lang in ["en", "he"]

    csv.field_size_limit(sys.maxsize)

    output = io.BytesIO()
    writer = csv.writer(output)

    # write header data
    writer.writerow(["Index Title"] + [index.title])
    writer.writerow(["Version Title"] + ["merged"])
    writer.writerow(["Language"] + [lang])
    writer.writerow(["Version Source"] + ["-"])
    writer.writerow(["Version Notes"] + ["-"])

    section_refs = index.all_section_refs()

    for section_ref in section_refs:
        segment_refs = section_ref.all_subrefs()
        seg_vers = {}

        # set blank array for version data
        for ref in segment_refs:
            seg_vers[ref.normal()] = []

        # populate each version
        section = section_ref.text(lang=lang, exclude_copyrighted=True).text
        for ref in segment_refs:
            if ref.sections[-1] > len(section):
                seg_vers[ref.normal()] += [""]
            else:
                seg_vers[ref.normal()] += [section[ref.sections[-1] - 1]]

        # write lines for each section
        for ref in segment_refs:
            writer.writerow([ref.normal()] + seg_vers[ref.normal()])

    return output.getvalue()
Example #11
0
import sys
from io import BytesIO

import six
import unicodecsv

from rows.plugins.utils import (
    create_table,
    get_filename_and_fobj,
    ipartition,
    serialize,
)

sniffer = unicodecsv.Sniffer()
unicodecsv.field_size_limit(sys.maxsize)


def fix_dialect(dialect):
    if not dialect.doublequote and dialect.escapechar is None:
        dialect.doublequote = True

    if dialect.quoting == unicodecsv.QUOTE_MINIMAL and dialect.quotechar == "'":
        # Python csv's Sniffer seems to detect a wrong quotechar when
        # quoting is minimal
        dialect.quotechar = '"'


if six.PY2:

    def discover_dialect(sample, encoding=None, delimiters=(b",", b";", b"\t", b"|")):
Example #12
0
#
#				python AddStudentCleaning.py Albany NY AlbanyNY_ForStudentsV1_ashley.dta 1 1930
#
# Note:			Student file MUST BE ON RHEA SERVER in the "studentcleaned" directory
#
#				ex:	"/LatestCities/1930/studentcleaned"

import os, sys, subprocess
import unicodecsv as csv
import pandas as pd
import numpy as np
import re
import pickle
import fuzzyset

csv.field_size_limit(sys.maxsize)

# These capture information from the command prompt
c = sys.argv[1]
s = sys.argv[2]
student_file = sys.argv[3]
version = sys.argv[4]
year = sys.argv[5]

#c = "St Louis"
#s = "MO"
#student_file = "StLouisMO_ForStudentsV4_rush.dta"
#version = 6
#year = 1930

c_spaces = c
Example #13
0
def load_authority_file(cursor, path_to_authority_files, filename, auth_file_to_entity_concept_mapping):
    print filename.upper()    

    start = time()
    value_types = models.ValueTypes.objects.all()
    filepath = os.path.join(path_to_authority_files, filename)
    unicodecsv.field_size_limit(sys.maxint)
    errors = []
    lookups = Lookups()

    #create nodes for each authority document file and relate them to the authority document node in the concept schema
    auth_doc_file_name = str(filename)
    display_file_name = string.capwords(auth_doc_file_name.replace('_',' ').replace('AUTHORITY DOCUMENT.csv', '').strip())
    if auth_doc_file_name.upper() != 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.CSV':
        top_concept = Concept()
        top_concept.id = str(uuid.uuid4())
        top_concept.nodetype = 'Concept'       
        top_concept.legacyoid = auth_doc_file_name
        top_concept.addvalue({'value':display_file_name, 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
        lookups.add_relationship(source='00000000-0000-0000-0000-000000000001', type='hasTopConcept', target=top_concept.id)

    else:
        top_concept = Concept().get(id = '00000000-0000-0000-0000-000000000005')
        top_concept.legacyoid = 'ARCHES RESOURCE CROSS-REFERENCE RELATIONSHIP TYPES.E32.csv'

    lookups.add_lookup(concept=top_concept, rownum=0)
    
    try:
        with open(filepath, 'rU') as f:
            rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','PREFLABEL','ALTLABELS','PARENTCONCEPTID','CONCEPTTYPE','PROVIDER'], 
                encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
            rows.next() # skip header row
            for row in rows:              
                try:
                    if 'MISSING' in row:
                        raise Exception('The row wasn\'t parsed properly. Missing %s' % (row['MISSING']))
                    else:
                        legacyoid = row[u'CONCEPTID']
                        concept = Concept()
                        concept.id = legacyoid if is_uuid(legacyoid) == True else str(uuid.uuid4())
                        concept.nodetype = 'Concept'# if row[u'CONCEPTTYPE'].upper() == 'INDEX' else 'Collection'
                        concept.legacyoid = row[u'CONCEPTID']
                        concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'prefLabel', 'category': 'label'})
                        if row['CONCEPTTYPE'].lower() == 'collector':
                            concept.addvalue({'value':row[u'PREFLABEL'], 'language': settings.LANGUAGE_CODE, 'type': 'collector', 'category': 'label'})
                        if row[u'ALTLABELS'] != '':
                            altlabel_list = row[u'ALTLABELS'].split(';')
                            for altlabel in altlabel_list:
                                concept.addvalue({'value':altlabel, 'language': settings.LANGUAGE_CODE, 'type': 'altLabel', 'category': 'label'})    
                        
                        parent_concept_id = lookups.get_lookup(legacyoid=row[u'PARENTCONCEPTID']).id
                        lookups.add_relationship(source=parent_concept_id, type='narrower', target=concept.id, rownum=rows.line_num)
                        # don't add a member relationship between a top concept and it's children
                        if parent_concept_id != top_concept.id: 
                            lookups.add_relationship(source=parent_concept_id, type='member', target=concept.id, rownum=rows.line_num)
                        
                        # add the member relationship from the E55 type (typically) to their top members
                        if auth_doc_file_name in auth_file_to_entity_concept_mapping and row[u'PARENTCONCEPTID'] == auth_doc_file_name:
                            for entitytype_info in auth_file_to_entity_concept_mapping[auth_doc_file_name]:
                                lookups.add_relationship(source=entitytype_info['ENTITYTYPE_CONCEPTID'], type='member', target=concept.id, rownum=rows.line_num)

                        if row[u'PARENTCONCEPTID'] == '' or (row[u'CONCEPTTYPE'].upper() != 'INDEX' and row[u'CONCEPTTYPE'].upper() != 'COLLECTOR'):
                            raise Exception('The row has invalid values.')

                        lookups.add_lookup(concept=concept, rownum=rows.line_num)    
                        
                except Exception as e:
                    errors.append('ERROR in row %s: %s' % (rows.line_num, str(e)))           
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    try:
        # try and open the values file if it exists
        if exists(filepath.replace('.csv', '.values.csv')):
            with open(filepath.replace('.csv', '.values.csv'), 'rU') as f:
                rows = unicodecsv.DictReader(f, fieldnames=['CONCEPTID','VALUE','VALUETYPE','PROVIDER'], 
                    encoding='utf-8-sig', delimiter=',', restkey='ADDITIONAL', restval='MISSING')
                rows.next() # skip header row
                for row in rows:
                    try:
                        if 'ADDITIONAL' in row:
                            raise Exception('The row wasn\'t parsed properly. Additional fields found %s.  Add quotes to values that have commas in them.' % (row['ADDITIONAL']))
                        else:
                            row_valuetype = row[u'VALUETYPE'].strip()
                            if row_valuetype not in value_types.values_list('valuetype', flat=True): 
                                valuetype = models.ValueTypes()
                                valuetype.valuetype = row_valuetype
                                valuetype.category = 'undefined'
                                valuetype.namespace = 'arches'
                                valuetype.save()
                            
                            value_types = models.ValueTypes.objects.all()
                            concept = lookups.get_lookup(legacyoid=row[u'CONCEPTID'])
                            category = value_types.get(valuetype=row_valuetype).category
                            concept.addvalue({'value':row[u'VALUE'], 'type': row[u'VALUETYPE'], 'category': category})

                    except Exception as e:
                        errors.append('ERROR in row %s (%s): %s' % (rows.line_num, str(e), row))
    
    except UnicodeDecodeError as e:
        errors.append('ERROR: Make sure the file is saved with UTF-8 encoding\n%s\n%s' % (str(e), traceback.format_exc()))
    except Exception as e:
        errors.append('ERROR: %s\n%s' % (str(e), traceback.format_exc()))            
        
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename.replace('.csv', '.values.csv')))
        errors.append('\n\n\n\n')


    # insert and index the concpets
    for key in lookups.lookup:
        try:
            lookups.lookup[key]['concept'].save()
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (lookups.lookup[key]['rownum'], str(e), traceback.format_exc()))
        
        lookups.lookup[key]['concept'].index(scheme=top_concept)            

    # insert the concept relations
    for relation in lookups.concept_relationships:
        sql = """
            INSERT INTO concepts.relations(conceptidfrom, conceptidto, relationtype)
            VALUES ('%s', '%s', '%s');
        """%(relation['source'], relation['target'], relation['type'])
        #print sql
        try:
            cursor.execute(sql)
        except Exception as e:
            errors.append('ERROR in row %s (%s):\n%s\n' % (relation['rownum'], str(e), traceback.format_exc()))
    
    if len(errors) > 0:
        errors.insert(0, 'ERRORS IN FILE: %s\n' % (filename))
        errors.append('\n\n\n\n')

    #print 'Time to parse = %s' % ("{0:.2f}".format(time() - start))    

    return errors
Example #14
0
from flask import stream_with_context

from werkzeug.exceptions import BadRequest
from itsdangerous import URLSafeSerializer

from model import Identifier, Publication
import os
import util

from collections import namedtuple, OrderedDict
import datetime

# csv modul ma nastaveny limit 128KB na field, co je pre nase ucely malo
# nastavme ho na 8MB nech to nehadze errory
import unicodecsv
unicodecsv.field_size_limit(8388608)


if 'CITACIE_DEBUG' in os.environ:
  app.debug = True

from local_settings import active_config
config = active_config(app)

serializer = URLSafeSerializer(config.secret)

import titlecase
def filter_titlecase(text, all_caps_only=False):
  if all_caps_only and not titlecase.ALL_CAPS.match(text):
    return text
  return titlecase.titlecase(text)
#!/usr/bin/python
# coding=utf-8
from __future__ import print_function
import argparse
import os
import sys
import unicodecsv as csv
import unicodedata
import codecs
import chardet
import re

csv.field_size_limit(sys.maxsize)

# from graph_tool.all import *


# Parse command-line arguments
###########################################
def options():
    """Parse command line options.

    Args:

    Returns:
        argparse object.
    Raises:
        IOError: if input file does not exist.
    """

    parser = argparse.ArgumentParser(description='Create a coauthorship network from publications downloaded '
Example #16
0
if __name__ == '__main__':

    # maxInt = sys.maxsize

    # while True:
    # # decrease the maxInt value by factor 10
    # # as long as the OverflowError occurs.

    # try:
    # csv.field_size_limit(maxInt)
    # break
    # except OverflowError:
    # maxInt = int(maxInt/10)
    import ctypes
    csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))

    transformer = NLXTransformer(s3_prefix='open-skills-private/NLX_extracted',
                                 temp_file_path='/mnt/sqltransfer')
    logging.basicConfig(level=logging.INFO)

    #logging.info('max csv size set to {}'.format(maxInt))
    #for year in ('2003', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2016', '2017', '2018'):
    #for year in ('2003', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013'):
    #for year in ('2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013'):
    for year in range(2003, 2020):
        year = str(year)
        #stats_counter = DatasetStatsCounter(
        #    quarter=year,
        #    dataset_id='NLX'
        #)
Example #17
0
# -*- coding: utf-8 -*-

import sys
from collections import namedtuple
import unicodecsv
from decimal import Decimal
from datetime import datetime
from progressbar import ProgressBar
import progressbar.widgets
from coaster.utils import getbool

from hascore import init_for
from hascore.models import db, GeoName, GeoCountryInfo, GeoAltName, GeoAdmin1Code, GeoAdmin2Code

unicodecsv.field_size_limit(sys.maxint)

CountryInfoRecord = namedtuple('CountryInfoRecord', [
    'iso_alpha2', 'iso_alpha3', 'iso_numeric', 'fips_code', 'title', 'capital',
    'area_in_sqkm', 'population', 'continent', 'tld', 'currency_code',
    'currency_name', 'phone', 'postal_code_format', 'postal_code_regex',
    'languages', 'geonameid', 'neighbours', 'equivalent_fips_code'
])

GeoNameRecord = namedtuple('GeoNameRecord', [
    'geonameid', 'title', 'ascii_title', 'alternatenames', 'latitude',
    'longitude', 'fclass', 'fcode', 'country_id', 'cc2', 'admin1', 'admin2',
    'admin3', 'admin4', 'population', 'elevation', 'dem', 'timezone', 'moddate'
])

GeoAdminRecord = namedtuple('GeoAdminRecord',
                            ['code', 'title', 'ascii_title', 'geonameid'])
Example #18
0
from collections import namedtuple
from urlparse import urljoin
import zipfile
import unicodecsv
import requests
from decimal import Decimal
from datetime import datetime
from progressbar import ProgressBar
import progressbar.widgets
from coaster.utils import getbool

from hascore import init_for
from hascore.models import db, GeoName, GeoCountryInfo, GeoAltName, GeoAdmin1Code, GeoAdmin2Code


unicodecsv.field_size_limit(sys.maxint)


CountryInfoRecord = namedtuple('CountryInfoRecord', ['iso_alpha2', 'iso_alpha3', 'iso_numeric',
    'fips_code', 'title', 'capital', 'area_in_sqkm', 'population', 'continent', 'tld',
    'currency_code', 'currency_name', 'phone', 'postal_code_format', 'postal_code_regex',
    'languages', 'geonameid', 'neighbours', 'equivalent_fips_code'])


GeoNameRecord = namedtuple('GeoNameRecord', ['geonameid', 'title', 'ascii_title', 'alternatenames',
    'latitude', 'longitude', 'fclass', 'fcode', 'country_id', 'cc2', 'admin1', 'admin2',
    'admin3', 'admin4', 'population', 'elevation', 'dem', 'timezone', 'moddate'])


GeoAdminRecord = namedtuple('GeoAdminRecord', ['code', 'title', 'ascii_title', 'geonameid'])
import sys

try:
    import ujson as json
except ImportError:
    import json  # noqa: F401
try:
    import parquet
except ImportError:
    parquet = False

if sys.version_info.major == 2:
    import unicodecsv as csv
else:
    import csv
    csv.field_size_limit(1000000000)
Example #20
0

try:
    import unicodecsv as csv
except ImportError:
    import csv

import zmq
from zmq.eventloop import ioloop
from zmq.eventloop.zmqstream import ZMQStream

from cs.eyrie.config import SOCKET_TYPES
from cs.eyrie.config import ZMQChannel


csv.field_size_limit(sys.maxint)


class Vassal(object):
    channels = {
        'control': ZMQChannel(
            endpoint='ipc:///tmp/eyrie_herald',
            socket_type=zmq.SUB,
        ),
    }
    title = '(eyrie:vassal)'
    app_name = 'eyrie'
    args = None
    cursor_factory = DictCursor

    def __init__(self, **kwargs):
Example #21
0
def import_versions_from_stream(csv_stream, columns, user_id):
    csv.field_size_limit(sys.maxsize)
    reader = csv.reader(csv_stream)
    rows = [row for row in reader]
    return _import_versions_from_csv(rows, columns, user_id)
Example #22
0
def import_versions_from_stream(csv_stream, columns, user_id):
    csv.field_size_limit(sys.maxsize)
    reader = csv.reader(csv_stream)
    rows = [row for row in reader]
    return _import_versions_from_csv(rows, columns, user_id)
Example #23
0
import os
import sys
from django.core.management.base import BaseCommand
from hits.models import Hit, HitTemplate
from unicodecsv import reader as UnicodeReader
#from util.unicodecsv import UnicodeReader
#from csv import reader as UnicodeReader

# The default field size limit is 131072 characters
import unicodecsv
unicodecsv.field_size_limit(sys.maxsize)


def get_or_create_template_from_html_file(htmlfile, template_file_path):
    template_file_path = os.path.abspath(template_file_path)
    name = template_file_path
    form = htmlfile.read().decode('utf-8')

    template, created = HitTemplate.objects.get_or_create(
        name=name,
        defaults={'form': form},
    )

    if created:
        template.save()

    return template


def parse_csv_file(fh):
    rows = UnicodeReader(fh)
Example #24
0
import unicodecsv

from calculadora_do_cidadao.rows.plugins.utils import (
    create_table,
    get_filename_and_fobj,
    ipartition,
    serialize,
)

sniffer = unicodecsv.Sniffer()
# Some CSV files have more than 128kB of data in a cell, so we force this value
# to be greater (16MB).
# TODO: check if it impacts in memory usage.
# TODO: may add option to change it by passing a parameter to import/export.
unicodecsv.field_size_limit(16777216)


def fix_dialect(dialect):
    if not dialect.doublequote and dialect.escapechar is None:
        dialect.doublequote = True

    if dialect.quoting == unicodecsv.QUOTE_MINIMAL and dialect.quotechar == "'":
        # Python csv's Sniffer seems to detect a wrong quotechar when
        # quoting is minimal
        dialect.quotechar = '"'


def discover_dialect(sample, encoding, delimiters=(",", ";", "\t", "|")):
    """Discover a CSV dialect based on a sample size.