def set_node_many_to_many_on_users(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) user_count = 0 m2m_count = 0 start = datetime.now() total = MODMUser.find(build_query(m2m_node_fields, MODMUser)).count() print '{} Users'.format(total) while user_count < total: with transaction.atomic(): for modm_user in MODMUser.find(build_query( m2m_node_fields, MODMUser)).sort('-date_registered')[ user_count:page_size + user_count]: django_user = User.objects.get( pk=modm_to_django[modm_user._id]) for m2m_node_field in m2m_node_fields: try: attr = getattr(django_user, m2m_node_field) except AttributeError as ex: # node field doesn't exist on user pass else: # node field exists, do the stuff django_pks = [] for modm_m2m_value in getattr(modm_user, m2m_node_field, []): if isinstance(modm_m2m_value, MODMNode): django_pks.append(modm_to_django[ modm_m2m_value._id]) elif isinstance(modm_m2m_value, basestring): django_pks.append(modm_to_django[ modm_m2m_value]) elif isinstance(modm_m2m_value, Pointer): django_pks.append(modm_to_django[ modm_m2m_value.node._id]) else: # wth print '\a' # bells! print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() if len(django_pks) > 0: attr.add(*django_pks) m2m_count += len(django_pks) user_count += 1 if user_count % page_size == 0 or user_count == total: print 'Through {} users and {} m2m'.format(user_count, m2m_count) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def set_node_many_to_many_on_users(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) user_count = 0 m2m_count = 0 start = datetime.now() total = MODMUser.find(build_query(m2m_node_fields, MODMUser)).count() print '{} Users'.format(total) while user_count < total: with transaction.atomic(): for modm_user in MODMUser.find( build_query(m2m_node_fields, MODMUser)).sort( '-date_registered')[user_count:page_size + user_count]: django_user = User.objects.get( pk=modm_to_django[modm_user._id]) for m2m_node_field in m2m_node_fields: try: attr = getattr(django_user, m2m_node_field) except AttributeError as ex: # node field doesn't exist on user pass else: # node field exists, do the stuff django_pks = [] for modm_m2m_value in getattr(modm_user, m2m_node_field, []): if isinstance(modm_m2m_value, MODMNode): django_pks.append( modm_to_django[modm_m2m_value._id]) elif isinstance(modm_m2m_value, basestring): django_pks.append( modm_to_django[modm_m2m_value]) elif isinstance(modm_m2m_value, Pointer): django_pks.append( modm_to_django[modm_m2m_value.node._id]) else: # wth print '\a' # bells! print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() if len(django_pks) > 0: attr.add(*django_pks) m2m_count += len(django_pks) user_count += 1 if user_count % page_size == 0 or user_count == total: print 'Through {} users and {} m2m'.format( user_count, m2m_count) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def main(): total = MODMUser.find().count() count = 0 page_size = 1000 while count < total: modm_users = MODMUser.find()[count:count + page_size] for modm_user in modm_users: django_user = get_or_create_user(modm_user) count += 1 print 'Count: {}'.format(count)
def save_bare_users(page_size=20000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) count = 0 start = datetime.now() total = MODMUser.find().count() while count < total: with transaction.atomic(): users = [] for modm_user in MODMUser.find().sort( '-date_registered')[count:count + page_size]: guid = Guid.objects.get(guid=modm_user._id) user_fields = dict(_guid_id=guid.pk, **modm_user.to_storage()) cleaned_user_fields = { key: user_fields[key] for key in user_fields if key not in user_key_blacklist } for k, v in cleaned_user_fields.iteritems(): if isinstance(v, datetime): cleaned_user_fields[k] = pytz.utc.localize(v) cleaned_user_fields = { k: v for k, v in cleaned_user_fields.iteritems() if v is not None } users.append(User(**cleaned_user_fields)) count += 1 if count % page_size == 0 or count == total: then = datetime.now() print 'Saving users {} through {}...'.format( count - page_size, count) woot = User.objects.bulk_create(users) for wit in woot: modm_to_django[wit._guid.guid] = wit.pk now = datetime.now() print 'Done with {} users in {} seconds...'.format( len(woot), (now - then).total_seconds()) users = None woot = None guid = None user_fields = None cleaned_user_fields = None trash = gc.collect() print 'Took out {} trashes'.format(trash) print 'Modm Users: {}'.format(total) print 'django Users: {}'.format(User.objects.all().count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def save_bare_users(page_size=20000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) count = 0 start = datetime.now() total = MODMUser.find().count() while count < total: with transaction.atomic(): users = [] for modm_user in MODMUser.find().sort('-date_registered')[ count:count + page_size]: guid = Guid.objects.get(guid=modm_user._id) user_fields = dict(_guid_id=guid.pk, **modm_user.to_storage()) cleaned_user_fields = {key: user_fields[key] for key in user_fields if key not in user_key_blacklist} for k, v in cleaned_user_fields.iteritems(): if isinstance(v, datetime): cleaned_user_fields[k] = pytz.utc.localize(v) cleaned_user_fields = {k: v for k, v in cleaned_user_fields.iteritems() if v is not None} users.append(User(**cleaned_user_fields)) count += 1 if count % page_size == 0 or count == total: then = datetime.now() print 'Saving users {} through {}...'.format( count - page_size, count) woot = User.objects.bulk_create(users) for wit in woot: modm_to_django[wit._guid.guid] = wit.pk now = datetime.now() print 'Done with {} users in {} seconds...'.format( len(woot), (now - then).total_seconds()) users = None woot = None guid = None user_fields = None cleaned_user_fields = None trash = gc.collect() print 'Took out {} trashes'.format(trash) print 'Modm Users: {}'.format(total) print 'django Users: {}'.format(User.objects.all().count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def save_bare_system_tags(page_size=10000): print 'Starting save_bare_system_tags...' start = datetime.now() things = list(MODMNode.find(MQ( 'system_tags', 'ne', [])).sort('-_id')) + list( MODMUser.find(MQ('system_tags', 'ne', [])).sort('-_id')) system_tag_ids = [] for thing in things: for system_tag in thing.system_tags: system_tag_ids.append(system_tag) unique_system_tag_ids = set(system_tag_ids) total = len(unique_system_tag_ids) system_tags = [] for system_tag_id in unique_system_tag_ids: system_tags.append( Tag(_id=system_tag_id, lower=system_tag_id.lower(), system=True)) woot = Tag.objects.bulk_create(system_tags) print 'MODM System Tags: {}'.format(total) print 'django system tags: {}'.format( Tag.objects.filter(system=True).count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def save_bare_system_tags(page_size=10000): print 'Starting save_bare_system_tags...' start = datetime.now() things = list(MODMNode.find(MQ('system_tags', 'ne', [])).sort( '-_id')) + list(MODMUser.find(MQ('system_tags', 'ne', [])).sort( '-_id')) system_tag_ids = [] for thing in things: for system_tag in thing.system_tags: system_tag_ids.append(system_tag) unique_system_tag_ids = set(system_tag_ids) total = len(unique_system_tag_ids) system_tags = [] for system_tag_id in unique_system_tag_ids: system_tags.append(Tag(_id=system_tag_id, lower=system_tag_id.lower(), system=True)) woot = Tag.objects.bulk_create(system_tags) print 'MODM System Tags: {}'.format(total) print 'django system tags: {}'.format(Tag.objects.filter(system= True).count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def migrate_users(index): logger.info("Migrating users to index: {}".format(index)) n_migr = 0 n_iter = 0 for user in User.find(): if user.is_active: search.update_user(user, index=index) n_migr += 1 n_iter += 1 logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(n_iter, n_migr))
def migrate_users(index): logger.info("Migrating users to index: {}".format(index)) n_migr = 0 n_iter = 0 for user in User.find(): if user.is_active: search.update_user(user, index=index) n_migr += 1 n_iter += 1 logger.info('Users iterated: {0}\nUsers migrated: {1}'.format( n_iter, n_migr))
def find_inactive_users_with_no_inactivity_email_sent_or_queued(): inactive_users = User.find( (Q('date_last_login', 'lt', datetime.utcnow() - settings.NO_LOGIN_WAIT_TIME) & Q('osf4m', 'ne', 'system_tags')) | (Q('date_last_login', 'lt', datetime.utcnow() - settings.NO_LOGIN_OSF4M_WAIT_TIME) & Q('osf4m', 'eq', 'system_tags')) ) inactive_emails = mails.QueuedMail.find(Q('email_type', 'eq', mails.NO_LOGIN_TYPE)) #This is done to prevent User query returns comparison to User, as equality fails #on datetime fields due to pymongo rounding. Instead here _id is compared. users_sent_id = [email.user._id for email in inactive_emails] inactive_ids = [user._id for user in inactive_users if user.is_active] users_to_send = [User.load(id) for id in (set(inactive_ids) - set(users_sent_id))] return users_to_send
def find_inactive_users_with_no_inactivity_email_sent_or_queued(): inactive_users = User.find( (Q('date_last_login', 'lt', timezone.now() - settings.NO_LOGIN_WAIT_TIME) & Q('tags__name', 'ne', 'osf4m')) | (Q('date_last_login', 'lt', timezone.now() - settings.NO_LOGIN_OSF4M_WAIT_TIME) & Q('tags__name', 'eq', 'osf4m')) ) inactive_emails = mails.QueuedMail.find(Q('email_type', 'eq', mails.NO_LOGIN_TYPE)) #This is done to prevent User query returns comparison to User, as equality fails #on datetime fields due to pymongo rounding. Instead here _id is compared. users_sent_id = [email.user._id for email in inactive_emails] inactive_ids = [user._id for user in inactive_users if user.is_active] users_to_send = [User.load(id) for id in (set(inactive_ids) - set(users_sent_id))] return users_to_send
def set_user_foreign_keys_on_users(page_size=10000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) user_count = 0 fk_count = 0 cache_hits = 0 cache_misses = 0 start = datetime.now() total = MODMUser.find(build_query(fk_user_fields, MODMUser)).count() while user_count < total: with transaction.atomic(): for modm_user in MODMUser.find( build_query(fk_user_fields, MODMUser)).sort( '-date_registered')[user_count:user_count + page_size]: django_user = User.objects.get(_guid__guid=modm_user._id) for fk_user_field in fk_user_fields: value = getattr(modm_user, fk_user_field, None) if value is not None: if isinstance(value, basestring): # value is a guid, try the cache table for the pk if value in modm_to_django: setattr(django_user, '{}_id'.format(fk_user_field), modm_to_django[value]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value).pk setattr(django_user, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value] = user_id cache_misses += 1 elif isinstance(value, MODMUser): # value is a user object, try the cache table for the pk if value._id in modm_to_django: setattr(django_user, '{}_id'.format(fk_user_field), modm_to_django[value._id]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value._id).pk setattr(django_user, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value._id] = user_id cache_misses += 1 else: # that's odd. print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() fk_count += 1 django_user.save() user_count += 1 if user_count % page_size == 0 or user_count == total: print 'Through {} users and {} foreign keys'.format( user_count, fk_count) print 'Cache: Hits {} Misses {}'.format( cache_hits, cache_misses) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def migrate_users(): for user in User.find(Q('is_registered', 'eq', True) & Q('date_confirmed', 'ne', None)): user.update_search()
def migrate_users(): for user in User.find( Q('is_registered', 'eq', True) & Q('date_confirmed', 'ne', None)): user.update_search()
def set_user_foreign_keys_on_users(page_size=10000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) user_count = 0 fk_count = 0 cache_hits = 0 cache_misses = 0 start = datetime.now() total = MODMUser.find(build_query(fk_user_fields, MODMUser)).count() while user_count < total: with transaction.atomic(): for modm_user in MODMUser.find(build_query( fk_user_fields, MODMUser)).sort('-date_registered')[ user_count:user_count + page_size]: django_user = User.objects.get(_guid__guid=modm_user._id) for fk_user_field in fk_user_fields: value = getattr(modm_user, fk_user_field, None) if value is not None: if isinstance(value, basestring): # value is a guid, try the cache table for the pk if value in modm_to_django: setattr(django_user, '{}_id'.format(fk_user_field), modm_to_django[value]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value).pk setattr(django_user, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value] = user_id cache_misses += 1 elif isinstance(value, MODMUser): # value is a user object, try the cache table for the pk if value._id in modm_to_django: setattr(django_user, '{}_id'.format(fk_user_field), modm_to_django[value._id]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value._id).pk setattr(django_user, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value._id] = user_id cache_misses += 1 else: # that's odd. print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() fk_count += 1 django_user.save() user_count += 1 if user_count % page_size == 0 or user_count == total: print 'Through {} users and {} foreign keys'.format( user_count, fk_count) print 'Cache: Hits {} Misses {}'.format(cache_hits, cache_misses) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())