def export_txt(cls, feed): '''Export records as a GTFS comma-separated file''' objects = cls.objects.in_feed(feed) # If no records, return None if not objects.exists(): return # Get the columns used in the dataset column_map = objects.populated_column_map() columns, fields = zip(*column_map) extra_columns = feed.meta.get( 'extra_columns', {}).get(cls.__name__, []) # Get sort order if hasattr(cls, '_sort_order'): sort_fields = cls._sort_order else: sort_fields = [] for field in fields: base_field = field.split('__', 1)[0] point_match = re_point.match(base_field) if point_match: continue field_type = cls._meta.get_field(base_field) assert not isinstance(field_type, ManyToManyField) sort_fields.append(field) # Create CSV writer out = StringIO() csv_writer = writer(out, lineterminator='\n') # Write header row header_row = [text_type(c) for c in columns] header_row.extend(extra_columns) write_text_rows(csv_writer, [header_row]) # Report the work to be done total = objects.count() logger.info( '%d %s to export...', total, cls._meta.verbose_name_plural) # Populate related items cache model_to_field_name = {} cache = {} for field_name in fields: if '__' in field_name: local_field_name, subfield_name = field_name.split('__', 1) field = cls._meta.get_field(local_field_name) field_type = field.rel.to model_name = field_type.__name__ if model_name in model_to_field_name: # Already loaded this model under a different field name cache[field_name] = cache[model_to_field_name[model_name]] else: # Load all feed data for this model pairs = field_type.objects.in_feed( feed).values_list('id', subfield_name) cache[field_name] = dict( (i, text_type(x)) for i, x in pairs) cache[field_name][None] = u'' model_to_field_name[model_name] = field_name # For large querysets, break up by the first field if total < large_queryset_size: querysets = [objects.order_by(*sort_fields)] else: # pragma: no cover field1_raw = sort_fields[0] assert '__' in field1_raw assert field1_raw in cache field1 = field1_raw.split('__', 1)[0] field1_id = field1 + '_id' # Sort field1 ids by field1 values val_to_id = dict((v, k) for k, v in cache[field1_raw].items()) assert len(val_to_id) == len(cache[field1_raw]) sorted_vals = sorted(val_to_id.keys()) querysets = [] for val in sorted_vals: fid = val_to_id[val] if fid: qs = objects.filter( **{field1_id: fid}).order_by(*sort_fields[1:]) querysets.append(qs) # Assemble the rows, writing when we hit batch size count = 0 rows = [] for queryset in querysets: for item in queryset.order_by(*sort_fields): row = [] for csv_name, field_name in column_map: obj = item point_match = re_point.match(field_name) if '__' in field_name: # Return relations from cache local_field_name = field_name.split('__', 1)[0] field_id = getattr(obj, local_field_name + '_id') row.append(cache[field_name][field_id]) elif point_match: # Get the lat or long from the point name, index = point_match.groups() field = getattr(obj, name) row.append(field.coords[int(index)]) else: # Handle other field types field = getattr(obj, field_name) if obj else '' if isinstance(field, date): formatted = field.strftime(u'%Y%m%d') row.append(text_type(formatted)) elif isinstance(field, bool): row.append(1 if field else 0) elif field is None: row.append(u'') else: row.append(text_type(field)) for col in extra_columns: row.append(obj.extra_data.get(col, u'')) rows.append(row) if len(rows) % batch_size == 0: # pragma: no cover write_text_rows(csv_writer, rows) count += len(rows) logger.info( "Exported %d %s", count, cls._meta.verbose_name_plural) rows = [] # Write rows smaller than batch size write_text_rows(csv_writer, rows) return out.getvalue()
def export_txt(cls, feed): '''Export records as a GTFS comma-separated file''' objects = cls.objects.in_feed(feed) # If no records, return None if not objects.exists(): return # Get the columns used in the dataset column_map = objects.populated_column_map() columns, fields = zip(*column_map) extra_columns = feed.meta.get('extra_columns', {}).get(cls.__name__, []) # Get sort order if hasattr(cls, '_sort_order'): sort_fields = cls._sort_order else: sort_fields = [] for field in fields: base_field = field.split('__', 1)[0] point_match = re_point.match(base_field) if point_match: continue field_type = cls._meta.get_field(base_field) assert not isinstance(field_type, ManyToManyField) sort_fields.append(field) # Create CSV writer out = StringIO() csv_writer = writer(out, lineterminator='\n') # Write header row header_row = [text_type(c) for c in columns] header_row.extend(extra_columns) write_text_rows(csv_writer, [header_row]) # Report the work to be done total = objects.count() logger.info('%d %s to export...', total, cls._meta.verbose_name_plural) # Populate related items cache model_to_field_name = {} cache = {} for field_name in fields: if '__' in field_name: local_field_name, subfield_name = field_name.split('__', 1) field = cls._meta.get_field(local_field_name) field_type = field.related_model model_name = field_type.__name__ if model_name in model_to_field_name: # Already loaded this model under a different field name cache[field_name] = cache[model_to_field_name[model_name]] else: # Load all feed data for this model pairs = field_type.objects.in_feed(feed).values_list( 'id', subfield_name) cache[field_name] = dict( (i, text_type(x)) for i, x in pairs) cache[field_name][None] = u'' model_to_field_name[model_name] = field_name # For large querysets, break up by the first field if total < large_queryset_size: querysets = [objects.order_by(*sort_fields)] else: # pragma: no cover field1_raw = sort_fields[0] assert '__' in field1_raw assert field1_raw in cache field1 = field1_raw.split('__', 1)[0] field1_id = field1 + '_id' # Sort field1 ids by field1 values val_to_id = dict((v, k) for k, v in cache[field1_raw].items()) assert len(val_to_id) == len(cache[field1_raw]) sorted_vals = sorted(val_to_id.keys()) querysets = [] for val in sorted_vals: fid = val_to_id[val] if fid: qs = objects.filter(**{ field1_id: fid }).order_by(*sort_fields[1:]) querysets.append(qs) # Assemble the rows, writing when we hit batch size count = 0 rows = [] for queryset in querysets: for item in queryset.order_by(*sort_fields): row = [] for csv_name, field_name in column_map: obj = item point_match = re_point.match(field_name) if '__' in field_name: # Return relations from cache local_field_name = field_name.split('__', 1)[0] field_id = getattr(obj, local_field_name + '_id') row.append(cache[field_name][field_id]) elif point_match: # Get the lat or long from the point name, index = point_match.groups() field = getattr(obj, name) row.append(field.coords[int(index)]) else: # Handle other field types field = getattr(obj, field_name) if obj else '' if isinstance(field, date): formatted = field.strftime(u'%Y%m%d') row.append(text_type(formatted)) elif isinstance(field, bool): row.append(1 if field else 0) elif field is None: row.append(u'') else: row.append(text_type(field)) for col in extra_columns: row.append(obj.extra_data.get(col, u'')) rows.append(row) if len(rows) % batch_size == 0: # pragma: no cover write_text_rows(csv_writer, rows) count += len(rows) logger.info("Exported %d %s", count, cls._meta.verbose_name_plural) rows = [] # Write rows smaller than batch size write_text_rows(csv_writer, rows) return out.getvalue()