def set_node_many_to_many_on_users(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    user_count = 0
    m2m_count = 0
    start = datetime.now()
    total = MODMUser.find(build_query(m2m_node_fields, MODMUser)).count()
    print '{} Users'.format(total)
    while user_count < total:
        with transaction.atomic():
            for modm_user in MODMUser.find(build_query(
                    m2m_node_fields, MODMUser)).sort('-date_registered')[
                        user_count:page_size + user_count]:
                django_user = User.objects.get(
                    pk=modm_to_django[modm_user._id])
                for m2m_node_field in m2m_node_fields:
                    try:
                        attr = getattr(django_user, m2m_node_field)
                    except AttributeError as ex:
                        # node field doesn't exist on user
                        pass
                    else:
                        # node field exists, do the stuff
                        django_pks = []
                        for modm_m2m_value in getattr(modm_user,
                                                      m2m_node_field, []):
                            if isinstance(modm_m2m_value, MODMNode):
                                django_pks.append(modm_to_django[
                                    modm_m2m_value._id])
                            elif isinstance(modm_m2m_value, basestring):
                                django_pks.append(modm_to_django[
                                    modm_m2m_value])
                            elif isinstance(modm_m2m_value, Pointer):
                                django_pks.append(modm_to_django[
                                    modm_m2m_value.node._id])
                            else:
                                # wth
                                print '\a'  # bells!
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                import bpdb
                                bpdb.set_trace()

                        if len(django_pks) > 0:
                            attr.add(*django_pks)
                        m2m_count += len(django_pks)
                user_count += 1
                if user_count % page_size == 0 or user_count == total:
                    print 'Through {} users and {} m2m'.format(user_count,
                                                               m2m_count)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
def set_node_many_to_many_on_users(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    user_count = 0
    m2m_count = 0
    start = datetime.now()
    total = MODMUser.find(build_query(m2m_node_fields, MODMUser)).count()
    print '{} Users'.format(total)
    while user_count < total:
        with transaction.atomic():
            for modm_user in MODMUser.find(
                    build_query(m2m_node_fields, MODMUser)).sort(
                        '-date_registered')[user_count:page_size + user_count]:
                django_user = User.objects.get(
                    pk=modm_to_django[modm_user._id])
                for m2m_node_field in m2m_node_fields:
                    try:
                        attr = getattr(django_user, m2m_node_field)
                    except AttributeError as ex:
                        # node field doesn't exist on user
                        pass
                    else:
                        # node field exists, do the stuff
                        django_pks = []
                        for modm_m2m_value in getattr(modm_user,
                                                      m2m_node_field, []):
                            if isinstance(modm_m2m_value, MODMNode):
                                django_pks.append(
                                    modm_to_django[modm_m2m_value._id])
                            elif isinstance(modm_m2m_value, basestring):
                                django_pks.append(
                                    modm_to_django[modm_m2m_value])
                            elif isinstance(modm_m2m_value, Pointer):
                                django_pks.append(
                                    modm_to_django[modm_m2m_value.node._id])
                            else:
                                # wth
                                print '\a'  # bells!
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                import bpdb
                                bpdb.set_trace()

                        if len(django_pks) > 0:
                            attr.add(*django_pks)
                        m2m_count += len(django_pks)
                user_count += 1
                if user_count % page_size == 0 or user_count == total:
                    print 'Through {} users and {} m2m'.format(
                        user_count, m2m_count)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemple #3
0
def main():
    total = MODMUser.find().count()
    count = 0
    page_size = 1000

    while count < total:
        modm_users = MODMUser.find()[count:count + page_size]
        for modm_user in modm_users:
            django_user = get_or_create_user(modm_user)
            count += 1
        print 'Count: {}'.format(count)
def save_bare_users(page_size=20000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMUser.find().count()

    while count < total:
        with transaction.atomic():
            users = []
            for modm_user in MODMUser.find().sort(
                    '-date_registered')[count:count + page_size]:
                guid = Guid.objects.get(guid=modm_user._id)
                user_fields = dict(_guid_id=guid.pk, **modm_user.to_storage())

                cleaned_user_fields = {
                    key: user_fields[key]
                    for key in user_fields if key not in user_key_blacklist
                }

                for k, v in cleaned_user_fields.iteritems():
                    if isinstance(v, datetime):
                        cleaned_user_fields[k] = pytz.utc.localize(v)

                cleaned_user_fields = {
                    k: v
                    for k, v in cleaned_user_fields.iteritems()
                    if v is not None
                }
                users.append(User(**cleaned_user_fields))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving users {} through {}...'.format(
                        count - page_size, count)
                    woot = User.objects.bulk_create(users)
                    for wit in woot:
                        modm_to_django[wit._guid.guid] = wit.pk
                    now = datetime.now()
                    print 'Done with {} users in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    users = None
                    woot = None
                    guid = None
                    user_fields = None
                    cleaned_user_fields = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'Modm Users: {}'.format(total)
    print 'django Users: {}'.format(User.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
def save_bare_users(page_size=20000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMUser.find().count()

    while count < total:
        with transaction.atomic():
            users = []
            for modm_user in MODMUser.find().sort('-date_registered')[
                    count:count + page_size]:
                guid = Guid.objects.get(guid=modm_user._id)
                user_fields = dict(_guid_id=guid.pk, **modm_user.to_storage())

                cleaned_user_fields = {key: user_fields[key]
                                       for key in user_fields
                                       if key not in user_key_blacklist}

                for k, v in cleaned_user_fields.iteritems():
                    if isinstance(v, datetime):
                        cleaned_user_fields[k] = pytz.utc.localize(v)

                cleaned_user_fields = {k: v
                                       for k, v in
                                       cleaned_user_fields.iteritems()
                                       if v is not None}
                users.append(User(**cleaned_user_fields))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving users {} through {}...'.format(
                        count - page_size, count)
                    woot = User.objects.bulk_create(users)
                    for wit in woot:
                        modm_to_django[wit._guid.guid] = wit.pk
                    now = datetime.now()
                    print 'Done with {} users in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    users = None
                    woot = None
                    guid = None
                    user_fields = None
                    cleaned_user_fields = None
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'Modm Users: {}'.format(total)
    print 'django Users: {}'.format(User.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
def save_bare_system_tags(page_size=10000):
    print 'Starting save_bare_system_tags...'
    start = datetime.now()

    things = list(MODMNode.find(MQ(
        'system_tags', 'ne', [])).sort('-_id')) + list(
            MODMUser.find(MQ('system_tags', 'ne', [])).sort('-_id'))

    system_tag_ids = []
    for thing in things:
        for system_tag in thing.system_tags:
            system_tag_ids.append(system_tag)

    unique_system_tag_ids = set(system_tag_ids)

    total = len(unique_system_tag_ids)

    system_tags = []
    for system_tag_id in unique_system_tag_ids:
        system_tags.append(
            Tag(_id=system_tag_id, lower=system_tag_id.lower(), system=True))

    woot = Tag.objects.bulk_create(system_tags)

    print 'MODM System Tags: {}'.format(total)
    print 'django system tags: {}'.format(
        Tag.objects.filter(system=True).count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
def save_bare_system_tags(page_size=10000):
    print 'Starting save_bare_system_tags...'
    start = datetime.now()

    things = list(MODMNode.find(MQ('system_tags', 'ne', [])).sort(
        '-_id')) + list(MODMUser.find(MQ('system_tags', 'ne', [])).sort(
            '-_id'))

    system_tag_ids = []
    for thing in things:
        for system_tag in thing.system_tags:
            system_tag_ids.append(system_tag)

    unique_system_tag_ids = set(system_tag_ids)

    total = len(unique_system_tag_ids)

    system_tags = []
    for system_tag_id in unique_system_tag_ids:
        system_tags.append(Tag(_id=system_tag_id,
                               lower=system_tag_id.lower(),
                               system=True))

    woot = Tag.objects.bulk_create(system_tags)

    print 'MODM System Tags: {}'.format(total)
    print 'django system tags: {}'.format(Tag.objects.filter(system=
                                                             True).count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemple #8
0
def migrate_users(index):
    logger.info("Migrating users to index: {}".format(index))
    n_migr = 0
    n_iter = 0
    for user in User.find():
        if user.is_active:
            search.update_user(user, index=index)
            n_migr += 1
        n_iter += 1

    logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(n_iter, n_migr))
Exemple #9
0
def migrate_users(index):
    logger.info("Migrating users to index: {}".format(index))
    n_migr = 0
    n_iter = 0
    for user in User.find():
        if user.is_active:
            search.update_user(user, index=index)
            n_migr += 1
        n_iter += 1

    logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(
        n_iter, n_migr))
Exemple #10
0
def find_inactive_users_with_no_inactivity_email_sent_or_queued():
    inactive_users = User.find(
        (Q('date_last_login', 'lt', datetime.utcnow() - settings.NO_LOGIN_WAIT_TIME) & Q('osf4m', 'ne', 'system_tags')) |
        (Q('date_last_login', 'lt', datetime.utcnow() - settings.NO_LOGIN_OSF4M_WAIT_TIME) & Q('osf4m', 'eq', 'system_tags'))
    )
    inactive_emails = mails.QueuedMail.find(Q('email_type', 'eq', mails.NO_LOGIN_TYPE))

    #This is done to prevent User query returns comparison to User, as equality fails
    #on datetime fields due to pymongo rounding. Instead here _id is compared.
    users_sent_id = [email.user._id for email in inactive_emails]
    inactive_ids = [user._id for user in inactive_users if user.is_active]
    users_to_send = [User.load(id) for id in (set(inactive_ids) - set(users_sent_id))]
    return users_to_send
Exemple #11
0
def find_inactive_users_with_no_inactivity_email_sent_or_queued():
    inactive_users = User.find(
        (Q('date_last_login', 'lt', timezone.now() - settings.NO_LOGIN_WAIT_TIME) & Q('tags__name', 'ne', 'osf4m')) |
        (Q('date_last_login', 'lt', timezone.now() - settings.NO_LOGIN_OSF4M_WAIT_TIME) & Q('tags__name', 'eq', 'osf4m'))
    )
    inactive_emails = mails.QueuedMail.find(Q('email_type', 'eq', mails.NO_LOGIN_TYPE))

    #This is done to prevent User query returns comparison to User, as equality fails
    #on datetime fields due to pymongo rounding. Instead here _id is compared.
    users_sent_id = [email.user._id for email in inactive_emails]
    inactive_ids = [user._id for user in inactive_users if user.is_active]
    users_to_send = [User.load(id) for id in (set(inactive_ids) - set(users_sent_id))]
    return users_to_send
Exemple #12
0
def set_user_foreign_keys_on_users(page_size=10000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    user_count = 0
    fk_count = 0
    cache_hits = 0
    cache_misses = 0
    start = datetime.now()
    total = MODMUser.find(build_query(fk_user_fields, MODMUser)).count()

    while user_count < total:
        with transaction.atomic():
            for modm_user in MODMUser.find(
                    build_query(fk_user_fields, MODMUser)).sort(
                        '-date_registered')[user_count:user_count + page_size]:
                django_user = User.objects.get(_guid__guid=modm_user._id)
                for fk_user_field in fk_user_fields:
                    value = getattr(modm_user, fk_user_field, None)
                    if value is not None:
                        if isinstance(value, basestring):
                            # value is a guid, try the cache table for the pk
                            if value in modm_to_django:
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value).pk
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value] = user_id
                                cache_misses += 1
                        elif isinstance(value, MODMUser):
                            # value is a user object, try the cache table for the pk
                            if value._id in modm_to_django:
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value._id])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value._id).pk
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value._id] = user_id
                                cache_misses += 1
                        else:
                            # that's odd.
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            import bpdb
                            bpdb.set_trace()
                        fk_count += 1
                django_user.save()
                user_count += 1
                if user_count % page_size == 0 or user_count == total:
                    print 'Through {} users and {} foreign keys'.format(
                        user_count, fk_count)
                    print 'Cache: Hits {} Misses {}'.format(
                        cache_hits, cache_misses)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemple #13
0
def migrate_users():
    for user in User.find(Q('is_registered', 'eq', True)
            & Q('date_confirmed', 'ne', None)):
        user.update_search()
Exemple #14
0
def migrate_users():
    for user in User.find(
            Q('is_registered', 'eq', True)
            & Q('date_confirmed', 'ne', None)):
        user.update_search()
Exemple #15
0
def set_user_foreign_keys_on_users(page_size=10000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    user_count = 0
    fk_count = 0
    cache_hits = 0
    cache_misses = 0
    start = datetime.now()
    total = MODMUser.find(build_query(fk_user_fields, MODMUser)).count()

    while user_count < total:
        with transaction.atomic():
            for modm_user in MODMUser.find(build_query(
                    fk_user_fields, MODMUser)).sort('-date_registered')[
                        user_count:user_count + page_size]:
                django_user = User.objects.get(_guid__guid=modm_user._id)
                for fk_user_field in fk_user_fields:
                    value = getattr(modm_user, fk_user_field, None)
                    if value is not None:
                        if isinstance(value, basestring):
                            # value is a guid, try the cache table for the pk
                            if value in modm_to_django:
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value).pk
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value] = user_id
                                cache_misses += 1
                        elif isinstance(value, MODMUser):
                            # value is a user object, try the cache table for the pk
                            if value._id in modm_to_django:
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value._id])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value._id).pk
                                setattr(django_user,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value._id] = user_id
                                cache_misses += 1
                        else:
                            # that's odd.
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            import bpdb
                            bpdb.set_trace()
                        fk_count += 1
                django_user.save()
                user_count += 1
                if user_count % page_size == 0 or user_count == total:
                    print 'Through {} users and {} foreign keys'.format(
                        user_count, fk_count)
                    print 'Cache: Hits {} Misses {}'.format(cache_hits,
                                                            cache_misses)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())