Ejemplo n.º 1
0
def read_csv(args):
    dialect = csv.unix_dialect()
    dialect.strict = True
    with open(args.csvfile, newline='') as fp:
        reader = csv.reader(fp, dialect=dialect)
        read_header(next(reader), active)
        read_rows(reader)
Ejemplo n.º 2
0
    def compute_dialect(self):
        try:
            extract = self.f.read(4096).decode(self.input_encoding)
        except (LookupError, UnicodeDecodeError):
            raise BadRequest('Unknown encoding {}'.format(self.input_encoding))
        try:
            dialect = csv.Sniffer().sniff(extract)
        except csv.Error:
            dialect = csv.unix_dialect()
        self.f.seek(0)

        # Escape double quotes with double quotes if needed.
        # See 2.7 in http://tools.ietf.org/html/rfc4180
        dialect.doublequote = True
        delimiter = self.request.form.get('delimiter')
        if delimiter:
            dialect.delimiter = delimiter

        # See https://github.com/etalab/addok/issues/90#event-353675239
        # and http://bugs.python.org/issue2078:
        # one column files will end up with non-sense delimiters.
        if dialect.delimiter.isalnum():
            # We guess we are in one column file, let's try to use a character
            # that will not be in the file content.
            for char in '|~^°':
                if char not in self.content:
                    dialect.delimiter = char
                    break
            else:
                raise BadRequest(self.MISSING_DELIMITER_MSG)

        self.dialect = dialect
Ejemplo n.º 3
0
 def load(self, path):
     with codecs.open(path, 'r', encoding=self.encoding) as f:
         content = f.read()
         try:
             dialect = csv.Sniffer().sniff(content)
         except csv.Error:
             dialect = csv.unix_dialect()
         return csv.DictReader(content.splitlines(), dialect=dialect)
Ejemplo n.º 4
0
 def load(self, path):
     with codecs.open(path, 'r', encoding=self.encoding) as f:
         content = f.read()
         try:
             dialect = csv.Sniffer().sniff(content)
         except csv.Error:
             dialect = csv.unix_dialect()
         return csv.DictReader(content.splitlines(), dialect=dialect)
Ejemplo n.º 5
0
 def load(self, path):
     with open(path, 'r') as f:
         extract = f.read(4096)
         try:
             dialect = csv.Sniffer().sniff(extract)
         except csv.Error:
             dialect = csv.unix_dialect()
         f.seek(0)
         content = f.read()
         return csv.DictReader(content.splitlines(), dialect=dialect)
Ejemplo n.º 6
0
 def load(self, path):
     with open(path, 'r') as f:
         extract = f.read(4096)
         try:
             dialect = csv.Sniffer().sniff(extract)
         except csv.Error:
             dialect = csv.unix_dialect()
         f.seek(0)
         content = f.read()
         return csv.DictReader(content.splitlines(),
                               dialect=dialect)
Ejemplo n.º 7
0
 def from_csv(cls, csv_path):
     working_dir = op.dirname(csv_path)
     with codecs.open(csv_path, 'r') as f:
         content = f.read()
         try:
             dialect = csv.Sniffer().sniff(content)
         except csv.Error:
             dialect = csv.unix_dialect()
         medias = list(csv.DictReader(content.splitlines(),
                                      dialect=dialect))
         return cls(working_dir, medias)
Ejemplo n.º 8
0
def load_csv(path, encoding='utf-8'):
    path = Path(path)
    if not path.exists():
        abort('Path does not exist: {}'.format(path))
    with path.open(encoding=encoding) as f:
        extract = f.read(4096)
        try:
            dialect = csv.Sniffer().sniff(extract)
        except csv.Error:
            dialect = csv.unix_dialect()
        f.seek(0)
        content = f.read()
        return csv.DictReader(content.splitlines(), dialect=dialect)
Ejemplo n.º 9
0
def load_csv(path_or_file, encoding='utf-8'):
    if isinstance(path_or_file, (str, Path)):
        path = Path(path_or_file)
        if not path.exists():
            abort('Path does not exist: {}'.format(path))
        path_or_file = path.open(encoding=encoding)
    extract = path_or_file.read(4096)
    try:
        dialect = csv.Sniffer().sniff(extract)
    except csv.Error:
        dialect = csv.unix_dialect()
    path_or_file.seek(0)
    content = path_or_file.read()
    path_or_file.close()
    return csv.DictReader(content.splitlines(), dialect=dialect)
Ejemplo n.º 10
0
def load_csv(path_or_file, encoding='utf-8'):
    if isinstance(path_or_file, (str, Path)):
        path = Path(path_or_file)
        if not path.exists():
            abort('Path does not exist: {}'.format(path))
        path_or_file = path.open(encoding=encoding)
    extract = path_or_file.read(4096)
    try:
        dialect = csv.Sniffer().sniff(extract)
    except csv.Error:
        dialect = csv.unix_dialect()
    path_or_file.seek(0)
    content = path_or_file.read()
    path_or_file.close()
    return csv.DictReader(content.splitlines(), dialect=dialect)
    def split(self, outputdir, no_of_parts=None):
        if not os.path.isfile(self.file_or_dir):
            raise ValueError(
                "The constructor argument file_or_dir {} must be a file to invoke this operation."
                .format(self.file_or_dir))
        # Approximate each line is 2KB
        KB = 1024
        approx_size_of_each_line = 1 * KB
        approx_total_lines = os.path.getsize(
            self.file_or_dir) / approx_size_of_each_line

        # Get number of lines per part
        MB = 2 * (KB * KB)
        no_of_parts = no_of_parts or int(
            os.path.getsize(self.file_or_dir) / MB) + 1
        no_lines_per_part = int(approx_total_lines / no_of_parts) + 1

        self._logger.info("Dividing file {} into estimated {} parts".format(
            self.file_or_dir, no_of_parts))

        with open(self.file_or_dir, encoding=self.encoding) as handle:
            dialect = csv.unix_dialect()
            # # TODO: For some reason sniff doesnt pickup quote all.., hence hardcoded ..
            dialect.quoting = csv.QUOTE_ALL

            csv_reader = csv.reader(handle,
                                    delimiter=self.delimiter,
                                    quotechar=self.quote_character)
            # Skip first line if header
            header = None
            if self.has_header:
                header = next(csv_reader)

            # Count the number of lines
            part_index = 0
            end_of_file = False

            while (not end_of_file):
                part_index = part_index + 1
                part_name = "{}_part_{:03d}.csv".format(
                    os.path.basename(self.file_or_dir), part_index)
                output_part = os.path.join(outputdir, part_name)
                end_of_file = self._write_part_to_file(csv_reader, output_part,
                                                       no_lines_per_part,
                                                       header, dialect)

        self._logger.info("Completed dividing files sucessfully")
Ejemplo n.º 12
0
    def compute_dialect(self, req, file_, content, encoding):
        try:
            extract = file_.file.read(4096).decode(encoding)
        except (LookupError, UnicodeDecodeError) as e:
            msg = 'Unable to decode with encoding "{}"'.format(encoding)
            raise falcon.HTTPBadRequest(msg, str(e))
        try:
            dialect = csv.Sniffer().sniff(extract)
        except csv.Error:
            dialect = csv.unix_dialect()
        file_.file.seek(0)

        # Escape double quotes with double quotes if needed.
        # See 2.7 in http://tools.ietf.org/html/rfc4180
        dialect.doublequote = True
        delimiter = req.get_param('delimiter')
        if delimiter:
            dialect.delimiter = delimiter

        quote = req.get_param('quote')
        if quote:
            dialect.quotechar = quote

        # See https://github.com/etalab/addok/issues/90#event-353675239
        # and http://bugs.python.org/issue2078:
        # one column files will end up with non-sense delimiters.
        if dialect.delimiter.isalnum():
            # We guess we are in one column file, let's try to use a character
            # that will not be in the file content.
            for char in '|~^°':
                if char not in content:
                    dialect.delimiter = char
                    break
            else:
                raise falcon.HTTPBadRequest(self.MISSING_DELIMITER_MSG,
                                            self.MISSING_DELIMITER_MSG)

        return dialect
Ejemplo n.º 13
0
    def __init__(self, csvfn=None):
        '''MAPPING - Python access to the SBEM-uCT-VSD mapping
        map = MAPPING(csvfn) opens the given mapping file.
        map = MAPPING() uses the default file in the em170428 directory.
        Result has fields roi2can, roi2uct, roi2sbem that map ROI numbers
        to other IDs; sbem2can, sbem2uct, sbem2roi, sbem2roiid, sbem2tname
        that map SBEM ID to other IDs, and uct2can, uct2roi, uct2sbem that
        map uCT ID numbers to other IDs.'''

        self.roi2can = {}
        self.roi2uct = {}
        self.roi2sbem = {}
        self.sbem2can = {}
        self.sbem2uct = {}
        self.sbem2roi = {}
        self.sbem2roiid = {}
        self.sbem2tname = {}
        self.uct2can = {}
        self.uct2roi = {}
        self.uct2sbem = {}
        self.can2sbem = {}
        self.can2roi = {}
        self.can2uct = {}
        self.roiid2roi = {}
        self.roi2roiid = {}

        if csvfn is None:
            here = os.path.dirname(__file__)
            csvfn = here + '/../data/mapping.csv'
        lines = []
        with open(csvfn) as f:
            dl = csv.unix_dialect()
            rdr = csv.reader(f, dl)
            for row in rdr:
                lines.append(row)
        hdr = lines.pop(0)
        for l in lines:
            roi = self.convert_to_number(l[0])
            roiid = l[1]
            can = l[2]
            uct = self.convert_to_number(l[3])
            sbem = self.convert_to_number(l[5])
            tname = l[6]
            if can == '':
                can = None
            if roiid == '':
                roiid = None
                roi = None
            if roi is not None:
                self.roi2can[roi] = can
                self.roi2uct[roi] = uct
                self.roi2sbem[roi] = sbem
                self.roiid2roi[roiid] = roi
                self.roi2roiid[roi] = roiid
            if sbem is not None:
                self.sbem2can[sbem] = can
                self.sbem2uct[sbem] = uct
                self.sbem2roi[sbem] = roi
                self.sbem2roiid[sbem] = roiid
                self.sbem2tname[sbem] = tname
            if uct is not None:
                self.uct2can[uct] = can
                self.uct2sbem[uct] = sbem
                self.uct2roi[uct] = roi
            if can is not None:
                if can in self.can2roi:
                    print(f'Duplicate can: {can}')
                self.can2sbem[can] = sbem
                self.can2uct[can] = uct
                self.can2roi[can] = roi
Ejemplo n.º 14
0
    def __init__(self, csvfn=None):
        '''CONFIDENCE - Python access to the SBEM-uCT-VSD confidence file
        conf = CONFIDENCE(csvfn) opens the given confidence file.
        conf = CONFIDENCE() uses the default file in the em170428 directory.
        Result has several fields that each are dicts with tree IDs as keys:
           uctid (numeric)
           vsdid (letters)
           canoid
           sbemconf (0-100) - confidence of tracing
           vsductconf (0-100) - confidence of vsd to uct mapping
           gmapconf (0-100) - confidence of gmapimg results'''
        self.uctid = {}
        self.vsdid = {}
        self.canoid = {}
        self.sbemconf = {}
        self.vsductconf = {}
        self.gmapconf = {}
        self.vsd2tree = {}

        if csvfn is None:
            here = os.path.dirname(__file__)
            csvfn = here + '/../data/confidence.csv'
        lines = []
        with open(csvfn) as f:
            dl = csv.unix_dialect()
            rdr = csv.reader(f, dl)
            for row in rdr:
                lines.append(row)
        hdr = lines.pop(0)
        r = re.compile('^(\d+)\s*(\((\d+)(-([a-z]+))?\))?$')
        for l in lines:
            ids = l[0]
            cano = l[1]
            sbemc = l[4]
            vumapc = l[5]
            gmapc = l[6]
            m = r.match(ids)
            if m:
                try:
                    tid = m.group(1)
                    uctid = m.group(3)
                    vsdid = m.group(5)
                    if tid is not None:
                        tid = int(tid)
                    if uctid is not None:
                        uctid = int(uctid)
                    self.uctid[tid] = uctid
                    self.vsdid[tid] = vsdid
                    self.vsd2tree[vsdid] = tid
                    self.canoid[tid] = cano
                    if sbemc == '':
                        self.sbemconf[tid] = 100
                        print('Caution: no sbemconf for', ids,
                              '- assuming 100%')
                    else:
                        self.sbemconf[tid] = int(sbemc)
                    if vumapc == '-' or vumapc == '':
                        self.vsductconf[tid] = None
                    else:
                        self.vsductconf[tid] = int(vumapc)
                    if gmapc == '-' or gmapc == '' or gmapc == '?':
                        self.gmapconf[tid] = None
                    else:
                        self.gmapconf[tid] = int(gmapc)
                except:
                    print('Something wrong at', ids)
                    raise
            else:
                pass  # print('no match', ids)
Ejemplo n.º 15
0
from simplecrypt import decrypt, encrypt
from collections import namedtuple
from io import StringIO
from os import rename
from getpass import getpass
import csv


#TODO: use logging
def debug(*args, **kwargs):
    if not 'file' in kwargs:
        kwargs['file'] = stderr
    return print('DEBUG:', *args, **kwargs)


DIALECT = csv.unix_dialect()
ENCODING = 'ascii'
Database = namedtuple('Database', ['properties', 'Record', 'items'])


def loadDatabase(file_name, password):
    #TODO: catch
    with open(file_name, 'rb') as f:
        data = decrypt(password, f.read()).decode(ENCODING)

    data = data.rstrip().split(DIALECT.lineterminator)
    reader = csv.reader(data, dialect=DIALECT)
    props = tuple(next(reader))
    Record = namedtuple('Record', props)
    database = Database(
            properties=props,