def migrate(dry_run=True): added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED)) for log in added_logs: logger.info( 'Log with id <{}> being updated for affiliation added'.format( log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED log.save() removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED)) for log in removed_logs: logger.info( 'Log with id <{}> being updated for affiliation removed'.format( log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED log.save() nodes = Node.find(Q('primary_institution', 'ne', None)) for node in nodes: logger.info('Node with id <{}> and title <{}> being updated'.format( node._id, node.title)) inst = node.primary_institution if inst not in node.affiliated_institutions: node.affiliated_institutions.append(inst) node.primary_institution = None node.save() if dry_run: raise RuntimeError('Dry run, transaction rolled back.')
def migrate(dry_run=True): node_logs = list( NodeLog.find( Q("action", "in", [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) & Q("params.preprint", "exists", False) ) ) logger.info("Preparing to migrate {} NodeLogs".format(len(node_logs))) count = 0 for log in node_logs: preprint = None node_id = log.params.get("node") try: preprint = PreprintService.find_one(Q("node", "eq", node_id)) except NoResultsFound: logger.error("Skipping {}, preprint not found for node: {}".format(log._id, node_id)) continue logger.info("Migrating log - {} - to add params.preprint: {}, ".format(log._id, preprint._id)) log.params["preprint"] = preprint._id log.save() count += 1 logger.info("Migrated {} logs".format(count))
def do_migration(records, dry=False): for node in records: logs = list(NodeLog.find(Q('was_connected_to', 'contains', node))) existing_logs = node.logs for log in logs: if not log.node__logged: continue log_node = log.node__logged[0] # if the log_node is not contained in the node parent list then it doesn't belong to this node if log_node not in get_all_parents(node): logger.info( 'Excluding log {} from list because it is not associated with node {}' .format(log, node)) logs.remove(log) with TokuTransaction(): node.logs = logs + existing_logs node.system_tags.append(SYSTEM_TAG) node_type = 'registration' if node.is_registration else 'fork' logger.info('Adding {} logs to {} {}'.format( len(logs), node_type, node)) if not dry: try: node.save() except Exception as err: logger.error( 'Could not update logs for node {} due to error'. format(node._id)) logger.exception(err) logger.error('Skipping...')
def do_migration(records, dry=False): for node in records: logs = list(NodeLog.find(Q('was_connected_to', 'contains', node))) existing_logs = node.logs for log in logs: if not log.node__logged: continue log_node = log.node__logged[0] # if the log_node is not contained in the node parent list then it doesn't belong to this node if log_node not in get_all_parents(node): logger.info('Excluding log {} from list because it is not associated with node {}'.format(log, node)) logs.remove(log) with TokuTransaction(): node.logs = logs + existing_logs node.system_tags.append(SYSTEM_TAG) node_type = 'registration' if node.is_registration else 'fork' logger.info('Adding {} logs to {} {}'.format(len(logs), node_type, node)) if not dry: try: node.save() except Exception as err: logger.error('Could not update logs for node {} due to error'.format(node._id)) logger.exception(err) logger.error('Skipping...')
def migrate(dry_run=True): node_logs = list(NodeLog.find( Q('action', 'in', [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) & Q('params.preprint', 'exists', False) )) logger.info('Preparing to migrate {} NodeLogs'.format(len(node_logs))) count = 0 for log in node_logs: preprint = None node_id = log.params.get('node') try: preprint = PreprintService.find_one(Q('node', 'eq', node_id)) except NoResultsFound: logger.error('Skipping {}, preprint not found for node: {}'.format(log._id, node_id)) continue logger.info( 'Migrating log - {} - to add params.preprint: {}, '.format(log._id, preprint._id) ) log.params['preprint'] = preprint._id log.save() count += 1 logger.info('Migrated {} logs'.format(count))
def user_last_log(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) node_logs = NodeLog.find(query) return node_logs[node_logs.count()-1].date
def find_invalid_logs(): for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)): # Derive UTC datetime object from ObjectId id_date = ObjectId(log._id).generation_time id_date = id_date.replace(tzinfo=None) - id_date.utcoffset() if id_date > log.date: yield log
def count_user_logs(user): logs = NodeLog.find(Q('user', 'eq', user._id)) length = logs.count() if length == LOG_THRESHOLD: item = logs[0] if item.action == 'project_created' and item.node.is_bookmark_collection: length -= 1 return length
def user_last_log(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) node_logs = NodeLog.find(query) return node_logs[node_logs.count() - 1].date
def count_user_logs(user): logs = NodeLog.find(Q('user', 'eq', user._id)) length = logs.count() if length == LOG_THRESHOLD: item = logs[0] if item.action == 'project_created' and item.node.is_bookmark_collection: length -= 1 return length
def find_invalid_logs(): for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)): # Derive UTC datetime object from ObjectId id_date = ObjectId(log._id).generation_time id_date = id_date.replace(tzinfo=None) - id_date.utcoffset() if id_date > log.date: yield log
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) logs = NodeLog.find(query) length = logs.count() if length > 0: item = logs[0] if item.action == 'project_created' and item.node.is_dashboard: length -= 1 return length
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) logs = NodeLog.find(query) length = logs.count() if length > 0: item = logs[0] if item.action == 'project_created' and item.node.is_dashboard: length -= 1 return length
def main(dry): if dry: logging.warn('DRY mode running') now = datetime.utcnow() initiated_logs = NodeLog.find( Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now)) for log in initiated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id }, 'service': { 'name': preprint.provider.name } }) logging.info( 'Updating log {} from node {}, with preprint id: {}'.format( log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass updated_logs = NodeLog.find( Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now)) for log in updated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({'preprint': {'id': preprint._id}}) logging.info( 'Updating log {} from node {}, with preprint id: {}'.format( log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass
def migrate(dry_run=True): added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED)) for log in added_logs: logger.info('Log with id <{}> being updated for affiliation added'.format(log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED log.save() removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED)) for log in removed_logs: logger.info('Log with id <{}> being updated for affiliation removed'.format(log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED log.save() nodes = Node.find(Q('primary_institution', 'ne', None)) for node in nodes: logger.info('Node with id <{}> and title <{}> being updated'.format(node._id, node.title)) inst = node.primary_institution if inst not in node.affiliated_institutions: node.affiliated_institutions.append(inst) node.primary_institution = None node.save() if dry_run: raise RuntimeError('Dry run, transaction rolled back.')
def get_targets(): """ These logs are potentially missing params['registration'] fields. Params['node'] and original_node fields may incorrectly be pointing to the registration instead of the node. """ logs = NodeLog.find( Q('action', 'eq', 'registration_cancelled') | Q('action', 'eq', 'retraction_approved') | Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_cancelled') | Q('action', 'eq', 'embargo_terminated') ) return logs
def get_or_create_node(node_id, sqlite_db): """Gets an OSF node from the sqlite cache. If not found, pulls the node info from mongo and saves it. :param node_id: OSF node id (e.g. 'mst3k') :param sqlite_db: SQLite3 database handle :return: node dict """ if node_id is None: return None cursor = sqlite_db.cursor() query = "SELECT * FROM nodes WHERE id='{}'".format(node_id) cursor.execute(query) nodes = cursor.fetchall() if len(nodes) > 1: raise Exception("Multiple nodes found for single node ID") if nodes: return nodes[0] node = Node.load(node_id) if node is None: return None node_public_date = None privacy_actions = NodeLog.find( Q('node', 'eq', node_id) & Q('action', 'in', [NodeLog.MADE_PUBLIC, NodeLog.MADE_PRIVATE]) ).sort('-date') try: privacy_action = privacy_actions[0] except IndexError as e: pass else: if privacy_action.action == NodeLog.MADE_PUBLIC: node_public_date = privacy_action.date.isoformat() node_public_date = node_public_date[:-3] + 'Z' cursor.execute( u'INSERT INTO nodes (id, title, category, made_public_date) VALUES (?, ?, ?, ?)', (node_id, getattr(node, 'title'), getattr(node, 'category'), node_public_date) ) sqlite_db.commit() return get_or_create_node(node_id, sqlite_db)
def main(dry): if dry: logging.warn('DRY mode running') now = datetime.utcnow() initiated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now)) for log in initiated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id }, 'service': { 'name': preprint.provider.name } }) logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass updated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now)) for log in updated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id } }) logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass
def get_targets(): """ Fetches all registration-related logs except for project_registered. project_registered log is not included because params already correct. """ logs = NodeLog.find( Q('action', 'eq', 'registration_initiated') | Q('action', 'eq', 'registration_approved') | Q('action', 'eq', 'registration_cancelled') | # On staging, there are a few inconsistencies with these. Majority of params['node'] are registrations, but a handful are nodes. Q('action', 'eq', 'retraction_initiated') | Q('action', 'eq', 'retraction_approved') | # params['node'] is already equal to node. Adds registration_field below. Will be slow. Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_initiated') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_completed') | Q('action', 'eq', 'embargo_cancelled') ) return logs
def get_targets(): """ Fetches all registration-related logs except for project_registered. project_registered log is not included because params already correct. """ logs = NodeLog.find( Q('action', 'eq', 'registration_initiated') | Q('action', 'eq', 'registration_approved') | Q('action', 'eq', 'registration_cancelled') | # On staging, there are a few inconsistencies with these. Majority of params['node'] are registrations, but a handful are nodes. Q('action', 'eq', 'retraction_initiated') | Q('action', 'eq', 'retraction_approved') | # params['node'] is already equal to node. Adds registration_field below. Will be slow. Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_initiated') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_completed') | Q('action', 'eq', 'embargo_cancelled')) return logs
def get_targets(): return NodeLog.find( Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))
def get_targets(): return NodeLog.find(Q('should_hide', 'eq', True))
def get_targets(): # ... return the list of logs whose registrations we want to migrate ... targets = NodeLog.find(Q('action', 'eq', 'retraction_approved')) logger.info('Retractions found: {}'.format(len(targets))) return targets
def main(): start = datetime.now() split = start total = MODMNodeLog.find().count() count = 0 page_size = 10000 blank_users = 0 blank_nodes = 0 while count < total: garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print 'Migrating {} through {}'.format(count, count + page_size) django_nodelogs = deque() nodelog_guids = deque() for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count + page_size]: if modm_nodelog._id in nodelog_guids: print 'Nodelog with guid of {} and data of {} exists in batch'.format( modm_nodelog._id, modm_nodelog.to_storage()) continue else: nodelog_guids.append(modm_nodelog._id) try: user_pk = modm_to_django[modm_nodelog.user._id] except (KeyError, AttributeError) as ex: blank_users += 1 user_pk = None try: node_pk = modm_to_django[getattr(modm_nodelog, 'node', None)._id] except (KeyError, AttributeError) as ex: blank_nodes += 1 print 'Found blank node on {}'.format(modm_nodelog._id) node_pk = None if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) count += 1 if count % 1000 == 0: print 'Through {} in {}'.format(count, (datetime.now() - split).total_seconds()) split = datetime.now() if count % page_size == 0: print '{} blank users; {} blank nodes'.format( blank_users, blank_nodes) print 'Starting to migrate {} through {} which is {}'.format( count - page_size, count, len(django_nodelogs)) splat = datetime.now() if len(django_nodelogs) > 0: with transaction.atomic(): NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} in {} which is {}'.format( count - page_size, count, (datetime.now() - splat).total_seconds(), len(django_nodelogs)) django_nodelogs = deque() nodelog_guids = deque() garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print '\a\a\a\a\a' print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format( (datetime.now() - start).total_seconds(), total, NodeLog.objects.count()) print 'There were {} blank users and {} blank nodes'.format( blank_users, blank_nodes)
def get_targets(): return NodeLog.find(Q('should_hide', 'eq', True))
def get_targets(): # ... return the list of logs whose registrations we want to migrate ... targets = NodeLog.find(Q('action', 'eq', 'retraction_approved')) logger.info('Retractions found: {}'.format(len(targets))) return targets
def logs_since(user, date): return NodeLog.find( Q('user', 'eq', user._id) & Q('date', 'gt', date) )
def main(): total = MODMNodeLog.find().count() # total = len(modm_nodelogs) count = 0 page_size = 100000 django_nodelogs = [] django_nodelogs_ids = [] django_nodelogs_was_connected_to = {} print 'Migrating {} logs...'.format(total) while count < total: modm_nodelogs = None modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size] with transaction.atomic(): print 'Migrating {} through {} which is {}'.format( count, count + page_size, len(modm_nodelogs)) for modm_nodelog in modm_nodelogs: # don't recreate the log if it exists if NodeLog.objects.filter(guid=modm_nodelog._id).exists(): pass else: if modm_nodelog.user is not None: # try to get the pk out of the lookup table user_pk = modm_to_django.get(modm_nodelog.user._id, None) # it wasn't there if user_pk is None: # create a new user print 'Creating User {}'.format(modm_nodelog.user._id) user = get_or_create_user(modm_nodelog.user) user_pk = user.pk # put the user in the lookup table for next time modm_to_django[modm_nodelog.user._id] = user_pk else: # log doesn't have user user_pk = None # get the node (either a MODMNode instance or a node guid) node_id = modm_nodelog.params.get( 'node', modm_nodelog.params.get('project')) node_pk = None if node_id is not None: if isinstance(node_id, basestring): # it's a guid, look it up in the table node_pk = modm_to_django.get(node_id, None) elif isinstance(node_id, MODMNode): # it's an instance, look it up in the table node_pk = modm_to_django.get(node_id._id, None) if node_pk is None: print 'Creating Node {}'.format(node_id) # it wasn't in the table if isinstance(node_id, basestring): # it's a guid, get an instance and create a PG version modm_node = MODMNode.load(node_id) django_node = get_or_create_node(modm_node) if django_node is None: print 'Node {} does not exist.'.format( node_id) continue node_pk = get_or_create_node(modm_node).pk # put it in the table for later modm_to_django[modm_node._id] = node_pk elif isinstance(node_id, MODMNode): # it's an instance, create a PG version node_pk = get_or_create_node(node_id).pk # put it in the table for later modm_to_django[node_id._id] = node_pk if node_pk is not None: was_connected_to = [] for wct in modm_nodelog.was_connected_to: wct_pk = modm_to_django.get(wct._id, None) if wct_pk is None: wct_pk = get_or_create_node(wct).pk modm_to_django[wct._id] = wct_pk was_connected_to.append(wct_pk) if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) if modm_nodelog._id not in django_nodelogs_ids: django_nodelogs.append(NodeLog( guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) django_nodelogs_was_connected_to[ modm_nodelog._id] = was_connected_to django_nodelogs_ids.append(modm_nodelog._id) else: print 'NodeLog with id {} and data {} was already in the bulk_create'.format( modm_nodelog._id, modm_nodelog.to_storage()) else: print 'Node {} is None on NodeLog {}...'.format( node_id, modm_nodelog._id) count += 1 if count % (page_size / 50) == 0: print 'Through {}'.format(count) if count % page_size == 0: print 'Starting to migrate {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) if len(django_nodelogs) > 0: NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) print 'Adding m2m values' for django_nodelog in django_nodelogs: nl = NodeLog.objects.get(guid=django_nodelog.guid) nl.was_connected_to.add( *django_nodelogs_was_connected_to[ django_nodelog.guid]) print 'Finished adding m2m values' django_nodelogs = [] django_nodelogs_was_connected_to = {} garbage = gc.collect() print 'Collected {} garbages!'.format(garbage) print '\a' print '\a' print '\a' print '\a' print '\a' print 'Finished migration. MODM: {}, DJANGO: {}'.format( total, NodeLog.objects.all().count())
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) return NodeLog.find(query).count()
def main(): total = MODMNodeLog.find().count() # total = len(modm_nodelogs) count = 0 page_size = 100000 django_nodelogs = [] django_nodelogs_ids = [] django_nodelogs_was_connected_to = {} print 'Migrating {} logs...'.format(total) while count < total: modm_nodelogs = None modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size] with transaction.atomic(): print 'Migrating {} through {} which is {}'.format( count, count + page_size, len(modm_nodelogs)) for modm_nodelog in modm_nodelogs: # don't recreate the log if it exists if NodeLog.objects.filter(guid=modm_nodelog._id).exists(): pass else: if modm_nodelog.user is not None: # try to get the pk out of the lookup table user_pk = modm_to_django.get(modm_nodelog.user._id, None) # it wasn't there if user_pk is None: # create a new user print 'Creating User {}'.format( modm_nodelog.user._id) user = get_or_create_user(modm_nodelog.user) user_pk = user.pk # put the user in the lookup table for next time modm_to_django[modm_nodelog.user._id] = user_pk else: # log doesn't have user user_pk = None # get the node (either a MODMNode instance or a node guid) node_id = modm_nodelog.params.get( 'node', modm_nodelog.params.get('project')) node_pk = None if node_id is not None: if isinstance(node_id, basestring): # it's a guid, look it up in the table node_pk = modm_to_django.get(node_id, None) elif isinstance(node_id, MODMNode): # it's an instance, look it up in the table node_pk = modm_to_django.get(node_id._id, None) if node_pk is None: print 'Creating Node {}'.format(node_id) # it wasn't in the table if isinstance(node_id, basestring): # it's a guid, get an instance and create a PG version modm_node = MODMNode.load(node_id) django_node = get_or_create_node(modm_node) if django_node is None: print 'Node {} does not exist.'.format( node_id) continue node_pk = get_or_create_node(modm_node).pk # put it in the table for later modm_to_django[modm_node._id] = node_pk elif isinstance(node_id, MODMNode): # it's an instance, create a PG version node_pk = get_or_create_node(node_id).pk # put it in the table for later modm_to_django[node_id._id] = node_pk if node_pk is not None: was_connected_to = [] for wct in modm_nodelog.was_connected_to: wct_pk = modm_to_django.get(wct._id, None) if wct_pk is None: wct_pk = get_or_create_node(wct).pk modm_to_django[wct._id] = wct_pk was_connected_to.append(wct_pk) if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) if modm_nodelog._id not in django_nodelogs_ids: django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) django_nodelogs_was_connected_to[ modm_nodelog._id] = was_connected_to django_nodelogs_ids.append(modm_nodelog._id) else: print 'NodeLog with id {} and data {} was already in the bulk_create'.format( modm_nodelog._id, modm_nodelog.to_storage()) else: print 'Node {} is None on NodeLog {}...'.format( node_id, modm_nodelog._id) count += 1 if count % (page_size / 50) == 0: print 'Through {}'.format(count) if count % page_size == 0: print 'Starting to migrate {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) if len(django_nodelogs) > 0: NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) print 'Adding m2m values' for django_nodelog in django_nodelogs: nl = NodeLog.objects.get(guid=django_nodelog.guid) nl.was_connected_to.add( *django_nodelogs_was_connected_to[ django_nodelog.guid]) print 'Finished adding m2m values' django_nodelogs = [] django_nodelogs_was_connected_to = {} garbage = gc.collect() print 'Collected {} garbages!'.format(garbage) print '\a' print '\a' print '\a' print '\a' print '\a' print 'Finished migration. MODM: {}, DJANGO: {}'.format( total, NodeLog.objects.all().count())
def get_targets(): return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
def get_aggregate_logs(ids, user, count=100): query = Q('params.node', 'in', ids) return list(NodeLog.find(query).sort('date').limit(int(count)))
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) return NodeLog.find(query).count()
def get_registration_approved_logs(): # These logs do not have params['registration'] field logs = NodeLog.find(Q('action', 'eq', 'registration_approved') & Q('params.registration', 'eq', None)) return logs
def main(): start = datetime.now() split = start total = MODMNodeLog.find().count() count = 0 page_size = 10000 blank_users = 0 blank_nodes = 0 while count < total: garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print 'Migrating {} through {}'.format(count, count + page_size) django_nodelogs = deque() nodelog_guids = deque() for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count + page_size]: if modm_nodelog._id in nodelog_guids: print 'Nodelog with guid of {} and data of {} exists in batch'.format( modm_nodelog._id, modm_nodelog.to_storage()) continue else: nodelog_guids.append(modm_nodelog._id) try: user_pk = modm_to_django[modm_nodelog.user._id] except (KeyError, AttributeError) as ex: blank_users += 1 user_pk = None try: node_pk = modm_to_django[getattr(modm_nodelog, 'node', None)._id] except (KeyError, AttributeError) as ex: blank_nodes += 1 print 'Found blank node on {}'.format(modm_nodelog._id) node_pk = None if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) count += 1 if count % 1000 == 0: print 'Through {} in {}'.format(count, ( datetime.now() - split).total_seconds()) split = datetime.now() if count % page_size == 0: print '{} blank users; {} blank nodes'.format(blank_users, blank_nodes) print 'Starting to migrate {} through {} which is {}'.format( count - page_size, count, len(django_nodelogs)) splat = datetime.now() if len(django_nodelogs) > 0: with transaction.atomic(): NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} in {} which is {}'.format( count - page_size, count, (datetime.now() - splat).total_seconds(), len(django_nodelogs)) django_nodelogs = deque() nodelog_guids = deque() garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print '\a\a\a\a\a' print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format( (datetime.now() - start).total_seconds(), total, NodeLog.objects.count()) print 'There were {} blank users and {} blank nodes'.format(blank_users, blank_nodes)
def get_targets(): return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
def get_targets(): return NodeLog.find(Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))