def bulk_import_gs_as_biographies_activities(self, url=None, owner=None, sheet=None, **options): """ usage: python -W ignore manage.py task bulk_import_gs_as_biographies_activities --url=<your url> --sheet=activities """ rows, headers = utils.bulk_import_gs(url=url, sheet=sheet, use_cache=options['cache'], required_headers=['person_slug']) # group rows by person_slug people = pyd.group_by(rows, 'person_slug') #print people data_paths = utils.data_paths(headers=headers) #print data_paths # basic data structure based on headers column data_structure = {} for i, path, is_list in data_paths: utils.nested_set(data_structure, path, {}) #print data_structure def mapper(d): #print d _d = { 'sorting': pyd.get(d, u'data__activity__sorting', ''), 'type': pyd.get(d, u'data__activity__type', ''), 'description': { 'en_US': pyd.get(d, u'data__activity__description__en_US', ''), 'fr_FR': pyd.get(d, u'data__activity__description__fr_FR', '') }, 'date': { 'en_US': pyd.get(d, u'data__activity__date__en_US', ''), 'fr_FR': pyd.get(d, u'data__activity__date__fr_FR', '') }, 'start_date': pyd.get(d, u'data__activity__start_date'), 'end_date': pyd.get(d, u'data__activity__end_date') } return _d for slug, activities in people.iteritems(): logger.debug('adding %s activities to document {slug:%s}' % (len(activities), slug)) doc = Document.objects.get(slug=slug,type=Document.ENTITY) doc.data['activities'] = map(mapper, activities) doc.save()
def bulk_import_gs_as_tags(self, url=None, sheet=None, use_cache=False, **options): if not url: raise Exception( 'please specify a google spreadsheet url with the --url parameter' ) logger.debug('loading %s' % url) rows, headers = utils.bulk_import_gs( url=url, sheet=sheet, use_cache=use_cache, required_headers=['slug', 'category', 'data__provider']) data_paths = utils.data_paths(headers=headers) data_structure = {} for i, path, is_list in data_paths: utils.nested_set(data_structure, path, {}) logger.debug('data__* fields have been transformed to: %s' % data_structure) CATEGORIES = [item[0] for item in Tag.CATEGORY_CHOICES] logger.debug('categories available: %s' % CATEGORIES) for i, row in enumerate(rows): if not row['slug']: logger.debug('line %s: empty "slug", skipping.' % i) continue if not row['category'] or not row['category'] in CATEGORIES: logger.debug( 'line %s: category "%s" not matching %s, skipping.' % (i, row['category'], CATEGORIES)) continue _category = row['category'].strip() _slug = row['slug'].strip() if len(_slug) > 100: logger.debug('line %s: "slug" length is excessive, BREAK!' % i) break # this will raise an error if the tag exists already (changing category is not permitted here) tag, created = Tag.objects.get_or_create(slug=_slug, category=_category) tag.name = row.get('name', '').strip() _data = data_structure.copy() for key, path, is_list in data_paths: utils.nested_set(_data, path, row[key], as_list=is_list) tag.data = _data['data'] logger.debug('tag saved {slug:%s, created:%s, name:%s}' % (tag.slug, created, tag.name)) tag.save()
def bulk_import_gs_as_documents(self, url=None, sheet=None, use_cache=False, **options): if not url: raise Exception( 'please specify a google spreadsheet url with the --url parameter' ) logger.debug('loading %s' % url) rows, headers = utils.bulk_import_gs(url=url, sheet=sheet, use_cache=use_cache, required_headers=['slug', 'type']) # owner is the first staff user owner = Profile.objects.filter(user__is_staff=True).first() if not owner: raise Exception('no Profile object defined in the database!') data_paths = utils.data_paths(headers=headers) print data_paths # basic data structure based on headers column data_structure = {} for i, path, is_list in data_paths: utils.nested_set(data_structure, path, {}) logger.debug('data__* fields have been transformed to: %s' % data_structure) for i, row in enumerate(rows): if not row['slug'] or not row['type']: # logger.debug('line %s: empty "slug" or empty "type", skipping.' % i) continue _slug = re.sub('\s', '', row['slug']) _type = row['type'].strip() doc, created = Document.objects.get_or_create( slug=_slug, type=_type, defaults={'owner': owner.user}) doc.title = row['title'].strip() _data = data_structure.copy() for key, path, is_list in data_paths: utils.nested_set(_data, path, row[key], as_list=is_list) doc.data.update(_data['data']) # print doc.data if 'attachment' in row and len(row['attachment'].strip()) > 0: # must be relative to MEDIA_ROOT doc.attachment.name = row['attachment'] doc.save() logger.debug( 'line %(line)s: document created {pk:%(pk)s, type:%(type)s, slug:%(slug)s, created:%(created)s}' % { 'line': i, 'slug': _slug, 'type': _type, 'pk': doc.pk, 'created': created })
def bulk_import_gs_as_biographies(self, url=None, owner=None, sheet=None, **options): """ usage: python -W ignore manage.py task bulk_import_gs_as_biographies --owner=<your username> --url=<your url> --sheet=people """ logger.debug('loading %s' % url) # print owner, options owner = Profile.objects.filter(user__username=owner).first() if not owner: raise Exception('specify a **valid** miller username with the --owner parameter.') logger.debug('with owner: %s' % owner) biotag = Tag.objects.get(category=Tag.WRITING, slug='biography') rows, headers = utils.bulk_import_gs(url=url, sheet=sheet, use_cache=options['cache'], required_headers=['slug', 'type']) # create ---stories--- logger.debug('saving stories with related document...') for i, row in enumerate(rows): if not row['slug'] or not row['type'] or not row['title']: logger.debug('line %s: empty "slug", skipping.' % i) continue _slug = row.get('slug', '') _type = row.get('type', '') story, created = Story.objects.get_or_create(slug=_slug, defaults={ 'owner': owner.user, 'title': row.get('title', '') }) # print story.slug, not story.title, created if not story.title: story.title = row.get('title', '') if not story.abstract: story.abstract = '\n'.join(filter(None, [ row.get('data__description__en_US', ''), row.get('data__date__en_US','') ])).strip() # add birthdate to abstract #story.data['abstract']['en_US'] = #print row #if not story.data or not 'title' in story.data: story.data.update({ 'title': { 'en_US': '\n'.join(filter(None,[row.get('data__title__en_US', '')])), 'fr_FR': '\n'.join(filter(None,[row.get('data__title__fr_FR', '')])) }, 'abstract': { 'en_US': '\n'.join(filter(None, [ row.get('data__description__en_US', '') ])).strip(), 'fr_FR': '\n'.join(filter(None, [ row.get('data__description__fr_FR', '') ])).strip() } }) print story.data #print story.title, row['title'].strip() if not story.data.get('title').get('en_US', None): story.data['title']['en_US'] = row.get('title', '') if not story.data.get('abstract').get('en_US', None): story.data['abstract']['en_US'] = row.get('data__description__en_US', '') # create or get a document of type 'entity' doc, dcreated = Document.objects.get_or_create(slug=_slug, type=Document.ENTITY, defaults={ 'owner': owner.user }) if not doc.title: doc.title = row['title'].strip() logger.debug('- with cover document {slug:%s, type:%s, created:%s}' % (doc.slug, doc.type, dcreated)) story.tags.add(biotag) story.covers.add(doc) if dcreated: story.authors.add(owner.user.authorship.first()) story.save() logger.debug('ok - story {slug:%s, created:%s} saved' % (story.slug, created)) logger.debug('updating document data...') self.bulk_import_gs_as_documents(url=url, sheet=sheet, use_cache=True, required_headers=['slug', 'type'])