コード例 #1
0
ファイル: tests.py プロジェクト: cclauss/backports.csv
    def test_quote_none_quotechar_undefined(self):
        """A QUOTE_NONE dialect should not error if quotechar is undefined."""
        class CustomDialect(csv.Dialect):
            delimiter = '\t'
            skipinitialspace = False
            lineterminator = '\n'
            quoting = csv.QUOTE_NONE

        csv.writer(io.StringIO(), CustomDialect)
コード例 #2
0
    def fetch(self):
      fd, tmp_file = tempfile.mkstemp()

      pip = PointInPolygon(self.polygon_id, 60)

      traffic_signs = []
      reader = json.loads(open(self.mapping, 'r').read())
      try:
        for row in reader:
          traffic_signs += row['object']
      except:
        self.logger.err(row)
        raise

      with open(tmp_file, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y'])

      slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)]

      bboxes = pip.bboxes()

      start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10]
      b = 0
      for traffic_signs_ in slice(traffic_signs, 10):
        b = b + 1
        self.logger.log('Batch {0}/{1}: {2}'.format(b, round(len(traffic_signs) / 10 + 0.5), ','.join(traffic_signs_)))
        for bbox in bboxes:
          url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_))
          print(url)
          with open(tmp_file, 'a') as csvfile:
            writer = csv.writer(csvfile)

            r = None
            page = 0
            while(url):
              page = page + 1
              self.logger.log("Page {0}".format(page))
              r = downloader.get(url)
              url = r.links['next']['url'] if 'next' in r.links else None

              features = r.json()['features']
              filtered = 0
              self.logger.log('{0} features fetched'.format(len(features)))
              for j in features:
                p = j['properties']
                image_key = p['detections'][0]['image_key']
                gc = j['geometry']['coordinates']
                row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc
                if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]):
                  writer.writerow(row)
                  filtered = filtered + 1
              self.logger.log('{0} keeped'.format(filtered))

      return tmp_file
コード例 #3
0
    def fetch(self, url, tmp_file, date_string=None):
      pip = PointInPolygon(self.polygon_id, 60)

      traffic_signs = []
      reader = json.loads(open(self.mapping, 'r').read())
      try:
        for row in reader:
          traffic_signs += row['object']
      except:
        self.logger.err(row)
        raise

      with open(tmp_file, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['accuracy', 'direction', 'image_key', 'first_seen_at', 'last_seen_at', 'value', 'X', 'Y'])

      slice = lambda A, n: [A[i:i+n] for i in range(0, len(A), n)]

      bboxes = pip.bboxes()

      start_time = (datetime.today() - timedelta(days=365*2)).isoformat()[0:10]
      b = 0
      for traffic_signs_ in slice(traffic_signs, 10):
        b = b + 1
        self.logger.log('Batch {0}/{1}: {2}'.format(b, ceil(len(traffic_signs) / 10.0), ','.join(traffic_signs_)))
        for bbox in bboxes:
          url = 'https://a.mapillary.com/v3/map_features?bbox={bbox}&client_id={client_id}&layers={layer}&per_page=1000&start_time={start_time}&values={values}'.format(bbox=','.join(map(str, bbox)), layer=self.layer, client_id='MEpmMTFQclBTUWlacjV6RTUxWWMtZzo5OTc2NjY2MmRiMDUwYmMw', start_time=start_time, values=','.join(traffic_signs_))
          self.logger.log(url)
          with open(tmp_file, 'a') as csvfile:
            writer = csv.writer(csvfile)

            r = None
            page = 0
            while(url):
              page = page + 1
              self.logger.log("Page {0}".format(page))
              r = downloader.get(url)
              url = r.links['next']['url'] if 'next' in r.links else None

              features = r.json()['features']
              filtered = 0
              self.logger.log('{0} features fetched'.format(len(features)))
              for j in features:
                p = j['properties']
                image_key = p['detections'][0]['image_key']
                gc = j['geometry']['coordinates']
                row = [p['accuracy'], p['direction'] if 'direction' in p else None, image_key, p['first_seen_at'], p['last_seen_at'], p['value']] + gc
                if row[0] > 0.01 and pip.point_inside_polygon(gc[0], gc[1]):
                  writer.writerow(row)
                  filtered = filtered + 1
              self.logger.log('{0} keeped'.format(filtered))

      return True
コード例 #4
0
ファイル: tests.py プロジェクト: cclauss/backports.csv
    def test_quote_all_quotechar_unset(self):
        """A QUOTE_ALL dialect should error if quotechar is unset."""
        class CustomDialect(csv.Dialect):
            delimiter = '\t'
            skipinitialspace = False
            lineterminator = '\n'
            quoting = csv.QUOTE_ALL

        with self.assertRaises(TypeError) as cx:
            csv.writer(io.StringIO(), CustomDialect)
        assert cx.exception.args[
            0] == 'quotechar must be set if quoting enabled'
コード例 #5
0
ファイル: tests.py プロジェクト: gitter-badger/backports.csv
    def test_writerows(self):
        class BrokenFile:
            def write(self, buf):
                raise OSError
        writer = csv.writer(BrokenFile())
        self.assertRaises(OSError, writer.writerows, [['a']])

        with TemporaryFile("w+", newline='') as fileobj:
            writer = csv.writer(fileobj)
            self.assertRaises(TypeError, writer.writerows, None)
            writer.writerows([['a','b'],['c','d']])
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
コード例 #6
0
    def test_writerows(self):
        class BrokenFile:
            def write(self, buf):
                raise OSError

        writer = csv.writer(BrokenFile())
        self.assertRaises(OSError, writer.writerows, [['a']])

        with TemporaryFile("w+", newline='') as fileobj:
            writer = csv.writer(fileobj)
            self.assertRaises(TypeError, writer.writerows, None)
            writer.writerows([['a', 'b'], ['c', 'd']])
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
コード例 #7
0
ファイル: changes.py プロジェクト: dekoza/weblate
    def get(self, request, *args, **kwargs):
        object_list = self.get_queryset()[:2000]

        # Do reasonable ACL check for global
        acl_obj = self.translation or self.component or self.project
        if not acl_obj:
            for change in object_list:
                if change.component:
                    acl_obj = change.component
                    break

        if not request.user.has_perm('change.download', acl_obj):
            raise PermissionDenied()

        # Always output in english
        activate('en')

        response = HttpResponse(content_type='text/csv; charset=utf-8')
        response['Content-Disposition'] = 'attachment; filename=changes.csv'

        writer = csv.writer(response)

        # Add header
        writer.writerow(('timestamp', 'action', 'user', 'url', 'target'))

        for change in object_list:
            writer.writerow((
                change.timestamp.isoformat(),
                change.get_action_display(),
                change.user.username if change.user else '',
                get_site_url(change.get_absolute_url()),
                change.target,
            ))

        return response
コード例 #8
0
    def get(self, request, *args, **kwargs):
        object_list = self.get_queryset()[:2000]

        # Do reasonable ACL check for global
        acl_obj = self.translation or self.component or self.project
        if not acl_obj:
            for change in object_list:
                if change.component:
                    acl_obj = change.component
                    break

        if not request.user.has_perm('change.download', acl_obj):
            raise PermissionDenied()

        # Always output in english
        activate('en')

        response = HttpResponse(content_type='text/csv; charset=utf-8')
        response['Content-Disposition'] = 'attachment; filename=changes.csv'

        writer = csv.writer(response)

        # Add header
        writer.writerow(('timestamp', 'action', 'user', 'url', 'target'))

        for change in object_list:
            writer.writerow((
                change.timestamp.isoformat(),
                change.get_action_display(),
                change.user.username if change.user else '',
                get_site_url(change.get_absolute_url()),
                change.target,
            ))

        return response
コード例 #9
0
    def dumpCSV(self, sql, ext, head, callback):
        self.giscurs.execute(sql)
        row = []
        column = {}
        while True:
            many = self.giscurs.fetchmany(1000)
            if not many:
                break
            for res in many:
                row.append(res)
                for k in res['tags'].keys():
                    if k not in column:
                        column[k] = 1
                    else:
                        column[k] += 1
        column = sorted(column, key=column.get, reverse=True)
        column = list(filter(lambda a: a!=self.mapping.osmRef and not a in self.mapping.select.tags[0], column))
        column = [self.mapping.osmRef] + list(self.mapping.select.tags[0].keys()) + column
        buffer = io.StringIO()
        writer = csv.writer(buffer, lineterminator=u'\n')
        writer.writerow(head + column)
        for r in row:
            cc = []
            for c in column:
                tags = r['tags']
                if c in tags:
                    cc.append(tags[c])
                else:
                    cc.append(None)
            writer.writerow(callback(r, cc))

        with bz2.BZ2File(u"%s/%s-%s%s.csv.bz2" % (self.config.dst_dir, self.name, self.__class__.__name__, ext), mode='w') as csv_bz2_file:
            csv_bz2_file.write(buffer.getvalue().encode('utf-8'))
コード例 #10
0
 def _write_test(self, fields, expect, **kwargs):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj, **kwargs)
         writer.writerow(fields)
         fileobj.seek(0)
         self.assertEqual(fileobj.read(),
                          expect + writer.dialect.lineterminator)
コード例 #11
0
def store_to_csv(store_infos):
    f = open('./store.csv', 'a', encoding='euc-kr')
    csvWriter = csv.writer(f)

    for e in store_infos:
        temp_name = None
        temp_branch = None
        temp_address = None
        temp_phone_num = None
        if type(e.getName()) is not str:
            temp_name = e.getName().decode('euc-kr')
        else:
            temp_name = e.getName()
        if type(e.getName()) is not str:
            temp_branch = e.getBranch().decode('euc-kr')
        else:
            temp_branch = e.getBranch
        if type(e.getAddress()) is not str:
            temp_address = e.getAddress().decode('euc-kr')
        else:
            temp_address = e.getAddress()
        if type(e.getName()) is not str:
            temp_phone_num = e.getPhoneNum().decode('euc-kr')
        else:
            temp_phone_num = e.getPhoneNum()

        csvWriter.writerow(
            [temp_name, temp_branch, temp_address, temp_phone_num])

    f.close()
コード例 #12
0
def save_file_set(output_filename, delimiter, header_row, set_data_rows):
    mime_type = 'text/tsv' if delimiter == '\t' else 'text/csv'
    with open_file(output_filename, 'w', mime_type=mime_type) as f:
        writer = csv.writer(f, delimiter=text_type(delimiter))
        if header_row:
            write_csv_rows(writer, [header_row])
        write_csv_rows(writer, set_data_rows)
コード例 #13
0
ファイル: pipelines.py プロジェクト: JonasGroeger/scrariva
    def process_item(self, item, spider):
        isin = spider.isin

        reader = csv.reader(io.StringIO(item['csv']), delimiter=';')
        result = []

        next(reader)  # Skip header

        for line in reader:
            if not line:
                continue

            day = line[0]
            value = line[4]
            result.append((day, value))

        min_time = result[0][0]
        max_time = result[-1][0]

        output_filename = self.save_file.format(isin, min_time, max_time)

        with io.open(output_filename, 'w', newline='',
                     encoding='UTF-8') as output:
            writer = csv.writer(output, delimiter=';')
            writer.writerow(['Datum', 'Kurs'])
            for t in result:
                writer.writerow([t[0], t[1]])
コード例 #14
0
    def run(self):
        with io.open(self.output_file, 'w', newline='', encoding='utf8') as fh:
            w = csv.writer(fh)

            self._get_column_names()
            w.writerow(self.columns)

            with click.progressbar(self.data['days'],
                                   label='Exporting to CSV file...',
                                   fill_char=click.style('#',
                                                         fg='blue')) as days:
                for day in days:
                    data = [
                        day['date'],
                        day['grand_total']['total_seconds'],
                    ]
                    self._add_data_for_columns(data, self.projects,
                                               day['projects'])
                    self._add_data_for_columns(data, self.entities,
                                               day['entities'])
                    self._add_data_for_columns(data, self.languages,
                                               day['languages'])
                    self._add_data_for_columns(data, self.editors,
                                               day['editors'])
                    self._add_data_for_columns(data, self.operating_systems,
                                               day['operating_systems'])
                    w.writerow(data)
コード例 #15
0
    def write_csv(self, f):
        headings = [
            'comment',
            'is_helpful',
            'page',
            'referrer',
            'submitted_on',
            'language'
        ]

        writer = csv.writer(f, quoting=csv.QUOTE_ALL)
        writer.writerow([field for field in headings])

        for feedback in self:
            # For legacy compatibility purposes, generated CSVs should contain
            # only the date feedback was submitted, and not the complete
            # timestamp. Timestamps are stored in the database as UTC, but
            # we want them to be exported in the Django default timezone
            # specified in settings.TIME_ZONE, which is America/New_York.
            feedback.submitted_on = \
                localdate(feedback.submitted_on).strftime('%Y-%m-%d')

            writer.writerow([
                force_str(getattr(feedback, heading), strings_only=True)
                for heading in headings
            ])
コード例 #16
0
def dontruntest_big_csvs():
    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='n', columnType='INTEGER'),
        Column(name='is_bogus', columnType='BOOLEAN')
    ]

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    # write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp,
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0, 2)]
                writer.writerow(
                    ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0,
                     random.randint(0, 100), random.random() >= 0.5))
    # upload CSV
    syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
コード例 #17
0
ファイル: tests.py プロジェクト: cclauss/backports.csv
 def _write_error_test(self, exc, fields, **kwargs):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj, **kwargs)
         with self.assertRaises(exc):
             writer.writerow(fields)
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), '')
コード例 #18
0
ファイル: csvhelpers.py プロジェクト: rlugojr/csvdedupe
def writeUniqueResults(clustered_dupes, input_file, output_file):

    # Write our original data back out to a CSV with a new column called 
    # 'Cluster ID' which indicates which records refer to each other.

    logging.info('saving unique results to: %s' % output_file)

    cluster_membership = {}
    for cluster_id, (cluster, score) in enumerate(clustered_dupes):
        for record_id in cluster:
            cluster_membership[record_id] = cluster_id

    unique_record_id = cluster_id + 1

    writer = csv.writer(output_file)

    reader = csv.reader(StringIO(input_file))

    heading_row = next(reader)
    heading_row.insert(0, u'Cluster ID')
    writer.writerow(heading_row)

    seen_clusters = set()
    for row_id, row in enumerate(reader):
        if row_id in cluster_membership:
            cluster_id = cluster_membership[row_id]
            if cluster_id not in seen_clusters:
                row.insert(0, cluster_id)
                writer.writerow(row)
                seen_clusters.add(cluster_id)
        else:
            cluster_id = unique_record_id
            unique_record_id += 1
            row.insert(0, cluster_id)
            writer.writerow(row)
コード例 #19
0
ファイル: tests.py プロジェクト: gitter-badger/backports.csv
 def _write_test(self, fields, expect, **kwargs):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj, **kwargs)
         writer.writerow(fields)
         fileobj.seek(0)
         self.assertEqual(fileobj.read(),
                          expect + writer.dialect.lineterminator)
コード例 #20
0
def writeResults(clustered_dupes, input_file, output_file):

    # Write our original data back out to a CSV with a new column called
    # 'Cluster ID' which indicates which records refer to each other.

    logging.info('saving results to: %s' % output_file)

    cluster_membership = {}
    for cluster_id, (cluster, score) in enumerate(clustered_dupes):
        for record_id in cluster:
            cluster_membership[record_id] = cluster_id

    unique_record_id = cluster_id + 1

    writer = csv.writer(output_file)

    reader = csv.reader(StringIO(input_file))

    heading_row = next(reader)
    heading_row.insert(0, u'Cluster ID')
    writer.writerow(heading_row)

    for row_id, row in enumerate(reader):
        if row_id in cluster_membership:
            cluster_id = cluster_membership[row_id]
        else:
            cluster_id = unique_record_id
            unique_record_id += 1
        row.insert(0, cluster_id)
        writer.writerow(row)
コード例 #21
0
    def open(self):
        # Cheat the parent open
        self.encoding = 'UTF-8'
        f = Source.open(self)

        csvreader = csv.reader(f, delimiter=u';')
        structureet = [
            u'nofinesset,nofinessej,rs,rslongue,complrs,compldistrib,numvoie,typvoie,voie,compvoie,lieuditbp,commune,departement,libdepartement,ligneacheminement,telephone,telecopie,categetab,libcategetab,categagretab,libcategagretab,siret,codeape,codemft,libmft,codesph,libsph,dateouv,dateautor,datemaj,numuai,coordxet,coordyet,sourcecoordet,datemajcoord'
            .split(',')
        ]
        geolocalisation = {}
        for row in csvreader:
            if row[0] == 'structureet':
                structureet.append(row[1:])
            elif row[0] == 'geolocalisation':
                geolocalisation[row[1]] = row[2:]
        for row in structureet:
            row += geolocalisation.get(row[0], [])

        csvfile = io.StringIO()
        writer = csv.writer(csvfile)
        for row in structureet:
            writer.writerow(row)
        csvfile.seek(0)

        return csvfile
    def export_to_csv(self, result_list, export_filename="ACRCloud_ScanFile_Results.csv", export_dir="./"):
        try:
            results = []
            for item in result_list:
                filename = item["file"]
                timestamp = item["timestamp"]
                jsoninfo = item["result"]
                if "status" in jsoninfo and jsoninfo["status"]["code"] == 0:
                    row = self.parse_data(jsoninfo)
                    row = [filename, timestamp] + list(row)
                    results.append(row)

            results = sorted(results, key=lambda x:x[1])

            export_filepath = os.path.join(export_dir, export_filename)

            with codecs.open(export_filepath, 'w', 'utf-8-sig') as f:
                head_row = ['filename', 'timestamp',  'custom_files_title', 'custom_acrid', 'title', 'artists', 'album',
                        'acrid', 'played_duration', 'label', 'isrc', 'upc', 'dezzer', 'spotify', 'itunes', 'youtube']
                dw = csv.writer(f)
                dw.writerow(head_row)
                dw.writerows(results)
                if self.debug:
                    self.log.info("export_to_csv.Save Data to csv: {0}".format(export_filename))
        except Exception as e:
            self.log.error("Error@export_to_csv", exc_info=True)
コード例 #23
0
ファイル: tasks.py プロジェクト: jonaqp/Django_Celery_Ejemplo
def write_csv(filename, rows):
    with io.open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["latitude", "longitude", "datetime"])
        for row in rows:
            writer.writerow([row["latitude"], row["longitude"], row["datetime"]])
        f.close()
コード例 #24
0
 def test_unicode_write(self):
     import io
     with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:
         writer = csv.writer(fileobj)
         writer.writerow(self.names)
         expected = ",".join(self.names) + "\r\n"
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected)
コード例 #25
0
ファイル: tests.py プロジェクト: gitter-badger/backports.csv
 def test_unicode_write(self):
     import io
     with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:
         writer = csv.writer(fileobj)
         writer.writerow(self.names)
         expected = ",".join(self.names)+"\r\n"
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected)
コード例 #26
0
 def to_csv(table):
     with self.lock, self.conn.begin():
         result = self.conn.execute(select([table]))
         filepath = os.path.join(dirpath, table.name + ".csv")
         with io.open(filepath, mode="w", encoding="utf-8") as csv_file:
             csv_writer = csv.writer(csv_file)
             csv_writer.writerow(table.columns.keys())
             csv_writer.writerows(result)
コード例 #27
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
 def test_roundtrip_escaped_unquoted_newlines(self):
     with TemporaryFile("w+", newline="") as fileobj:
         writer = csv.writer(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\")
         rows = [["a\nb", "b"], ["c", "x\r\nd"]]
         writer.writerows(rows)
         fileobj.seek(0)
         for i, row in enumerate(csv.reader(fileobj, quoting=csv.QUOTE_NONE, escapechar="\\")):
             self.assertEqual(row, rows[i])
コード例 #28
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
    def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):

        with TemporaryFile("w+", newline="", encoding="utf-8") as fileobj:

            writer = csv.writer(fileobj, *writeargs, **kwwriteargs)
            writer.writerow([1, 2, 3])
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), expected)
コード例 #29
0
    def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):

        with TemporaryFile("w+", newline='', encoding="utf-8") as fileobj:

            writer = csv.writer(fileobj, *writeargs, **kwwriteargs)
            writer.writerow([1, 2, 3])
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), expected)
コード例 #30
0
 def write_data(self, buf, rows=None, **kwargs):
     if rows is None:
         rows = self.get_rows()
     csv_writer = csv.writer(codecs.getwriter(self.get_encoding())(
         buf, errors='replace'),
                             delimiter=self.get_delimiter())
     for row in rows:
         csv_writer.writerow(row)
コード例 #31
0
def format_csv_rows(rows, delimiter=','):
    get_logger().debug('format_csv_rows, rows: %s', rows)
    out = StringIO()
    writer = csv.writer(out, delimiter=text_type(delimiter))
    writer.writerows([[_to_text(x) for x in row] for row in rows])
    result = out.getvalue().rstrip('\r\n')
    get_logger().debug('format_csv_rows, result: %s', result)
    return result
コード例 #32
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
 def test_roundtrip_quoteed_newlines(self):
     with TemporaryFile("w+", newline="") as fileobj:
         writer = csv.writer(fileobj)
         self.assertRaises(TypeError, writer.writerows, None)
         rows = [["a\nb", "b"], ["c", "x\r\nd"]]
         writer.writerows(rows)
         fileobj.seek(0)
         for i, row in enumerate(csv.reader(fileobj)):
             self.assertEqual(row, rows[i])
コード例 #33
0
ファイル: listio.py プロジェクト: jakubvalenta/listio
def _write_csv(file_path,
               data,
               delimiter=DEFAULT_DELIMITER,
               lineterminator=DEFAULT_LINETERMINATOR):
    with io.open(file_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f,
                            delimiter=delimiter,
                            lineterminator=lineterminator)
        writer.writerows(data)
コード例 #34
0
def save_file_pairs_to_csv(output_path, source_xml_pairs):
    mkdirs_if_not_exists(dirname(output_path))
    delimiter = csv_delimiter_by_filename(output_path)
    mime_type = 'text/tsv' if delimiter == '\t' else 'text/csv'
    with open_file(output_path, 'w', mime_type=mime_type) as f:
        writer = csv.writer(f, delimiter=text_type(delimiter))
        write_csv_rows(writer, [['source_url', 'xml_url']])
        write_csv_rows(writer, source_xml_pairs)
    LOGGER.info('written results to %s', output_path)
コード例 #35
0
 def test_roundtrip_quoteed_newlines(self):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj)
         self.assertRaises(TypeError, writer.writerows, None)
         rows = [['a\nb', 'b'], ['c', 'x\r\nd']]
         writer.writerows(rows)
         fileobj.seek(0)
         for i, row in enumerate(csv.reader(fileobj)):
             self.assertEqual(row, rows[i])
コード例 #36
0
ファイル: tests.py プロジェクト: cclauss/backports.csv
 def test_quote_nonnumeric_decimal(self):
     """Decimals should not be quoted with non-numeric quoting."""
     import decimal
     with TemporaryFile('w+', newline='', encoding='utf-8') as fileobj:
         writer = csv.writer(fileobj, quoting=csv.QUOTE_NONNUMERIC)
         writer.writerow([10, 10.0, decimal.Decimal('10.0'), '10.0'])
         expected = '10,10.0,10.0,"10.0"\r\n'
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected)
コード例 #37
0
ファイル: tests.py プロジェクト: gitter-badger/backports.csv
 def test_float_write(self):
     import array
     contents = [(20-i)*0.1 for i in range(20)]
     a = array.array(str('f'), contents)
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj, dialect="excel")
         writer.writerow(a)
         expected = ",".join([str(i) for i in a])+"\r\n"
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected)
コード例 #38
0
ファイル: tests.py プロジェクト: gitter-badger/backports.csv
    def test_char_write(self):
        import array, string
        a = array.array(str('u'), text_type(string.ascii_letters))

        with TemporaryFile("w+", newline='') as fileobj:
            writer = csv.writer(fileobj, dialect="excel")
            writer.writerow(a)
            expected = ",".join(a)+"\r\n"
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), expected)
コード例 #39
0
    def test_char_write(self):
        import array, string
        a = array.array(str('u'), text_type(string.ascii_letters))

        with TemporaryFile("w+", newline='') as fileobj:
            writer = csv.writer(fileobj, dialect="excel")
            writer.writerow(a)
            expected = ",".join(a) + "\r\n"
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), expected)
コード例 #40
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
    def test_quote_nonnumeric_decimal(self):
        """Decimals should not be quoted with non-numeric quoting."""
        import decimal

        with TemporaryFile("w+", newline="", encoding="utf-8") as fileobj:
            writer = csv.writer(fileobj, quoting=csv.QUOTE_NONNUMERIC)
            writer.writerow([10, 10.0, decimal.Decimal("10.0"), "10.0"])
            expected = '10,10.0,10.0,"10.0"\r\n'
            fileobj.seek(0)
            self.assertEqual(fileobj.read(), expected)
コード例 #41
0
 def test_float_write(self):
     import array
     contents = [(20 - i) * 0.1 for i in range(20)]
     a = array.array(str('f'), contents)
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj, dialect="excel")
         writer.writerow(a)
         expected = ",".join([str(i) for i in a]) + "\r\n"
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected)
コード例 #42
0
ファイル: users.py プロジェクト: renalreg/radar
    def get(self):
        f = io.StringIO()
        writer = csv.writer(f)

        headers = [
            'User ID',
            'Username',
            'First Name',
            'Last Name',
            'Email',
            'Telephone',
            'Enabled',
            'Admin',
            'Last Login',
            'Last Active',
            'Cohorts',
            'Hospitals',
            'Roles',
        ]
        writer.writerow(headers)

        def get_groups(user, group_type):
            """Comma-separated list of groups."""

            groups = [x.name for x in user.groups if x.type == group_type]
            groups = sorted(groups)
            groups = uniq(groups)
            return ', '.join(groups)

        def get_roles(user):
            """Comma-separated list of roles."""
            roles = [gu.role.name for gu in user.group_users]
            return ', '.join(sorted(set(roles)))

        users = list_users()
        for user in users:
            output = []
            output.append(user.id)
            output.append(user.username)
            output.append(user.first_name)
            output.append(user.last_name)
            output.append(user.email)
            output.append(user.telephone_number)
            output.append(user.is_enabled)
            output.append(user.is_admin)
            output.append(user.last_login_date)
            output.append(user.last_active_date)
            output.append(get_groups(user, GROUP_TYPE.COHORT))
            output.append(get_groups(user, GROUP_TYPE.HOSPITAL))
            output.append(get_roles(user))

            writer.writerow(output)

        return Response(f.getvalue(), content_type='text/csv')
コード例 #43
0
ファイル: listio.py プロジェクト: jakubvalenta/listio
def _write_csv(
        file_path,
        data,
        delimiter=DEFAULT_DELIMITER,
        lineterminator=DEFAULT_LINETERMINATOR):
    with io.open(file_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(
            f,
            delimiter=delimiter,
            lineterminator=lineterminator
        )
        writer.writerows(data)
コード例 #44
0
    def read(self, read_len):
        if not hasattr(self, 'buffer'):
            with io.StringIO() as out:
                csv.writer(out).writerow(self.headers)
                self.buffer = out.getvalue()

        while self.reader is not None and len(self.buffer) < read_len:
            with io.StringIO() as out:
                writer = csv.writer(out)
                try:
                    for i in range(1000):
                        row = next(self.reader)
                        writer.writerow(row)
                        self.count += 1
                        if self.count % 1000 == 0:
                            print 'Loaded %s...' % self.count
                except StopIteration:
                    self.reader = None
                self.buffer += out.getvalue()

        chunk, self.buffer = self.buffer[:read_len], self.buffer[read_len:]
        return chunk
コード例 #45
0
 def test_roundtrip_escaped_unquoted_newlines(self):
     with TemporaryFile("w+", newline='') as fileobj:
         writer = csv.writer(fileobj,
                             quoting=csv.QUOTE_NONE,
                             escapechar="\\")
         rows = [['a\nb', 'b'], ['c', 'x\r\nd']]
         writer.writerows(rows)
         fileobj.seek(0)
         for i, row in enumerate(
                 csv.reader(fileobj,
                            quoting=csv.QUOTE_NONE,
                            escapechar="\\")):
             self.assertEqual(row, rows[i])
コード例 #46
0
ファイル: table.py プロジェクト: kkdang/synapsePythonClient
    def from_list_of_rows(cls, schema, values, filepath=None, etag=None, quoteCharacter='"', escapeCharacter="\\", lineEnd=str(os.linesep), separator=",", linesToSkip=0, includeRowIdAndRowVersion=None, headers=None):

        ## create CSV file
        f = None
        try:
            if not filepath:
                temp_dir = tempfile.mkdtemp()
                filepath = os.path.join(temp_dir, 'table.csv')

            f = io.open(filepath, 'w', encoding='utf-8', newline='')

            writer = csv.writer(f,
                quoting=csv.QUOTE_NONNUMERIC,
                delimiter=separator,
                escapechar=escapeCharacter,
                lineterminator=lineEnd,
                quotechar=quoteCharacter,
                skipinitialspace=linesToSkip)

            ## if we haven't explicitly set columns, try to grab them from
            ## the schema object
            if not headers and "columns_to_store" in schema and schema.columns_to_store is not None:
                headers = [SelectColumn.from_column(col) for col in schema.columns_to_store]

            ## write headers?
            if headers:
                writer.writerow([header.name for header in headers])
                header = True
            else:
                header = False

            ## write row data
            for row in values:
                writer.writerow(row)

        finally:
            if f: f.close()

        return cls(
            schema=schema,
            filepath=filepath,
            etag=etag,
            quoteCharacter=quoteCharacter,
            escapeCharacter=escapeCharacter,
            lineEnd=lineEnd,
            separator=separator,
            header=header,
            headers=headers,
            includeRowIdAndRowVersion=includeRowIdAndRowVersion)
コード例 #47
0
ファイル: csv.py プロジェクト: waveform80/lars
 def __init__(
         self, fileobj, header=False, dialect=CSV_DIALECT, encoding='utf-8',
         **kwargs):
     self.fileobj = fileobj
     self.header = header
     self.dialect = dialect
     self.encoding = encoding
     self.keywords = kwargs
     self.count = 0
     self._first_row = None
     # The csv writer outputs strings so we stick a transcoding shim between
     # the writer and the output object
     self._writer = csv_.writer(
         codecs.getwriter(self.encoding)(self.fileobj),
         dialect=self.dialect, **self.keywords)
コード例 #48
0
ファイル: views.py プロジェクト: Pike/pontoon
def _get_project_strings_csv(project, entities, output):
    """Return a CSV content of all strings and translations for a project and locale.

    The file format looks as follow:

        source, locale_code_1, locale_code_2
        "string A", "tranlation A1", "tranlation A2"
        "string B", "tranlation B1", "tranlation B2"

    The first column has all source strings. Then there is one column per enabled locale, each
    containing available translations for each source string (or an empty cell). The first line
    contains the code of each locale, expect for the first cell which is always "source".

    :arg Project project: the project from which to take strings
    :arg list entities: the list of all entities of the project
    :arg buffer output: a buffer to which the CSV writed will send its data

    :returns: the same output object with the CSV data

    """
    locales = Locale.objects.filter(project_locale__project=project)
    translations = (
        Translation.objects
        .filter(
            entity__resource__project=project,
            approved=True,
        )
        .prefetch_related('locale')
        .prefetch_related('entity')
    )
    all_data = dict((x.id, {'source': x.string}) for x in entities)

    for translation in translations:
        all_data[translation.entity.id][translation.locale.code] = translation.string

    writer = csv.writer(output)
    headers = ['source'] + [x.code for x in locales]
    writer.writerow(headers)
    for string in all_data.values():
        row = [string.get(key, '') for key in headers]
        writer.writerow(row)

    return output
コード例 #49
0
ファイル: csvhelpers.py プロジェクト: rlugojr/csvdedupe
def writeLinkedResults(clustered_pairs, input_1, input_2, output_file,
                       inner_join=False):
    logging.info('saving unique results to: %s' % output_file)

    matched_records = []
    seen_1 = set()
    seen_2 = set()

    input_1 = [row for row in csv.reader(StringIO(input_1))]
    row_header = input_1.pop(0)
    length_1 = len(row_header)

    input_2 = [row for row in csv.reader(StringIO(input_2))]
    row_header_2 = input_2.pop(0)
    length_2 = len(row_header_2)
    row_header += row_header_2

    for pair in clustered_pairs:
        index_1, index_2 = [int(index.split('|', 1)[1]) for index in pair[0]]

        matched_records.append(input_1[index_1] + input_2[index_2])
        seen_1.add(index_1)
        seen_2.add(index_2)

    writer = csv.writer(output_file)
    writer.writerow(row_header)

    for matches in matched_records:
        writer.writerow(matches)

    if not inner_join:

        for i, row in enumerate(input_1):
            if i not in seen_1:
                writer.writerow(row + [None] * length_2)

        for i, row in enumerate(input_2):
            if i not in seen_2:
                writer.writerow([None] * length_1 + row)
コード例 #50
0
def dontruntest_big_csvs():
    cols = []
    cols.append(Column(name='name', columnType='STRING', maximumSize=1000))
    cols.append(Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']))
    cols.append(Column(name='x', columnType='DOUBLE'))
    cols.append(Column(name='n', columnType='INTEGER'))
    cols.append(Column(name='is_bogus', columnType='BOOLEAN'))

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    print("Created table:", schema1.id)
    print("with columns:", schema1.columnIds)

    ## write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0,2)]
                writer.writerow(('Robot ' + str(i*100 + j), foo, random.random()*200.0, random.randint(0,100), random.random()>=0.5))
            print("wrote 100 rows to disk")

    ## upload CSV
    UploadToTableResult = syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    results = CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
    print("etag:", results.etag)
    print("tableId:", results.tableId)

    for row in results:
        print(row)
コード例 #51
0
    def open(self):
        # Cheat the parent open
        encoding, self.encoding = self.encoding, 'UTF-8'
        f = Source.open(self)

        csvreader = csv.reader(f, delimiter=u';')
        structureet = [u'nofinesset,nofinessej,rs,rslongue,complrs,compldistrib,numvoie,typvoie,voie,compvoie,lieuditbp,commune,departement,libdepartement,ligneacheminement,telephone,telecopie,categetab,libcategetab,categagretab,libcategagretab,siret,codeape,codemft,libmft,codesph,libsph,dateouv,dateautor,datemaj,numuai,coordxet,coordyet,sourcecoordet,datemajcoord'.split(',')]
        geolocalisation = {}
        for row in csvreader:
            if row[0] == 'structureet':
                structureet.append(row[1:])
            elif row[0] == 'geolocalisation':
                geolocalisation[row[1]] = row[2:]
        for row in structureet:
           row += geolocalisation.get(row[0], [])

        csvfile = io.StringIO()
        writer = csv.writer(csvfile)
        for row in structureet:
            writer.writerow(row)
        csvfile.seek(0)

        return csvfile
コード例 #52
0
ファイル: export.py プロジェクト: brucewu16899/modoboa
def export_domains(request):
    ctx = {
        "title": _("Export domains"),
        "action_label": _("Export"),
        "action_classes": "submit",
        "formid": "exportform",
        "action": reverse("admin:domain_export"),
    }

    if request.method == "POST":
        form = ExportDomainsForm(request.POST)
        form.is_valid()
        fp = six.StringIO()
        csvwriter = csv.writer(fp, delimiter=form.cleaned_data["sepchar"])
        for dom in get_domains(request.user,
                               **request.session["domains_filters"]):
            dom.to_csv(csvwriter)
        content = fp.getvalue()
        fp.close()
        return _export(content, form.cleaned_data["filename"])

    ctx["form"] = ExportDomainsForm()
    return render(request, "common/generic_modal_form.html", ctx)
コード例 #53
0
ファイル: zohocsvhtmltomarkdown.py プロジェクト: mick-t/misc
import io
import sys

import backports.csv as csv
import html2text

from markdown import markdown

CSV_FILE_NAME = 'forums.csv' # the file to import
NEW_CSV_FILE_NAME = 'forums_markdown.csv' # the file to create

# some content fields are bigger than csv.field_size_limit
csv.field_size_limit(sys.maxsize)
with io.open(CSV_FILE_NAME, 'r') as csvfile, io.open(NEW_CSV_FILE_NAME, 'w') as writecsvfile:
    reader = csv.reader(csvfile, delimiter=u',', quotechar=u'"')
    writer = csv.writer(writecsvfile)
    counter = 0
    for row in reader:
        col_number = 0
        my_row = []
        for col in row:
            if col_number == 3:
                # use the permalink as the file name
                title = col
                
            col_number = col_number + 1
            if col_number == 6:# & counter != 0:
                # aha, a content field!
                h = html2text.HTML2Text()
                markdown_col = h.handle(col)
                my_row.append(markdown_col)
コード例 #54
0
ファイル: patients.py プロジェクト: renalreg/radar
    def get(self):
        f = io.StringIO()
        writer = csv.writer(f)
        args = parse_args(PatientListRequestSerializer)

        cohorts = [i for i in args['group'] if i.type == GROUP_TYPE.COHORT]

        headers = [
            'Patient ID',
            'First Name',
            'Last Name',
            'Date of Birth',
            'Year of Birth',
            'Date of Death',
            'Year of Death',
            'Gender',
            'Gender Label',
            'Ethnicity',
            'Ethnicity Label',
            'Patient Number',
            'PV',
            'Recruited On',
            'Recruited Group Name',
            'Recruited Group Code',
            'Cohorts',
            'Hospitals',

        ]
        for cohort in cohorts:
            headers.append(cohort.short_name)

        writer.writerow(headers)

        def get_groups(patient, group_type):
            """Comma-separated list of groups."""

            groups = [x.name for x in patient.current_groups if x.type == group_type]
            groups = sorted(groups)
            groups = uniq(groups)
            return ', '.join(groups)

        patients = list_patients()

        for patient in patients:
            # Wrap the patient so demographics aren't exposed to unprivileged users
            patient = SkipProxy(PatientProxy(patient, current_user))

            output = []
            output.append(patient.id)
            output.append(patient.first_name)
            output.append(patient.last_name)
            output.append(patient.date_of_birth)
            output.append(patient.year_of_birth)
            output.append(patient.date_of_death)
            output.append(patient.year_of_death)
            output.append(patient.gender)
            output.append(patient.gender_label)
            output.append(patient.available_ethnicity)
            output.append(patient.ethnicity_label)
            output.append(get_attrs(patient, 'primary_patient_number', 'number'))
            output.append('Y' if patient.ukrdc else 'N')
            output.append(patient.recruited_date())
            output.append(get_attrs(patient.recruited_group(), 'name'))
            output.append(get_attrs(patient.recruited_group(), 'code'))
            output.append(get_groups(patient, GROUP_TYPE.COHORT))
            output.append(get_groups(patient, GROUP_TYPE.HOSPITAL))

            for cohort in cohorts:
                output.append(patient.recruited_date(cohort))

            writer.writerow(output)

        return Response(f.getvalue(), content_type='text/csv')
コード例 #55
0
def list2file(string_list, filepath):
    with io.open(filepath, 'w') as csvfile:
        writer = csv.writer(csvfile, doublequote=True, quoting=csv.QUOTE_MINIMAL)
        for string in string_list:
            writer.writerow([string])
コード例 #56
0
def print_csv(field_items, fileobj=sys.stdout, fields=None):
    writer = csv.writer(codecs.getwriter('utf8')(fileobj))
    fields = field_items['fields'] if fields is None else fields
    writer.writerow(fields)
    for row in field_items['items']:
        writer.writerow([row[field] for field in fields])
コード例 #57
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
 def writerAssertEqual(self, input, expected_result):
     with TemporaryFile("w+", newline="") as fileobj:
         writer = csv.writer(fileobj, dialect=self.dialect)
         writer.writerows(input)
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), expected_result)
コード例 #58
0
def test_csv_table():
    # Maybe not truly a unit test, but here because it doesn't do
    # network IO to synapse
    data = [["1", "1", "John Coltrane",  1926, 8.65, False],
            ["2", "1", "Miles Davis",    1926, 9.87, False],
            ["3", "1", "Bill Evans",     1929, 7.65, False],
            ["4", "1", "Paul Chambers",  1935, 5.14, False],
            ["5", "1", "Jimmy Cobb",     1929, 5.78, True],
            ["6", "1", "Scott LaFaro",   1936, 4.21, False],
            ["7", "1", "Sonny Rollins",  1930, 8.99, True],
            ["8", "1", "Kenny Burrel",   1931, 4.37, True]]

    filename = None

    cols = [Column(id='1', name='Name', columnType='STRING'),
            Column(id='2', name='Born', columnType='INTEGER'),
            Column(id='3', name='Hipness', columnType='DOUBLE'),
            Column(id='4', name='Living', columnType='BOOLEAN')]

    schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001")

    # TODO: use StringIO.StringIO(data) rather than writing files
    try:
        # create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert_is_instance(table, CsvFileTable)

        # need to set column headers to read a CSV file
        table.setColumnHeaders(
            [SelectColumn(name="ROW_ID", columnType="STRING"),
             SelectColumn(name="ROW_VERSION", columnType="STRING")] +
            [SelectColumn.from_column(col) for col in cols])

        # test iterator
        for table_row, expected_row in zip(table, data):
            assert_equals(table_row, expected_row)

        # test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert_equals(rowset_row['values'], expected_row[2:])
            assert_equals(rowset_row['rowId'], expected_row[0])
            assert_equals(rowset_row['versionNumber'], expected_row[1])

        df = table.asDataFrame()
        assert_equals(list(df['Name']), [row[2] for row in data])
        assert_equals(list(df['Born']), [row[3] for row in data])
        assert_equals(list(df['Living']), [row[5] for row in data])
        assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data])
        assert_equals(df.shape, (8, 4))

    except Exception:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise
コード例 #59
0
ファイル: tests.py プロジェクト: ryanhiebert/backports.csv
 def _write_error_test(self, exc, fields, **kwargs):
     with TemporaryFile("w+", newline="") as fileobj:
         writer = csv.writer(fileobj, **kwargs)
         self.assertRaises(exc, writer.writerow, fields)
         fileobj.seek(0)
         self.assertEqual(fileobj.read(), "")