def migrate(delete, remove=False, index=None, app=None): """Reindexes relevant documents in ES :param bool delete: Delete documents that should not be indexed :param bool remove: Removes old index after migrating :param str index: index alias to version and migrate :param App app: Flask app for context """ index = index or settings.ELASTIC_INDEX app = app or init_app('website.settings', set_backends=True, routes=True) script_utils.add_file_logger(logger, __file__) # NOTE: We do NOT use the app.text_request_context() as a # context manager because we don't want the teardown_request # functions to be triggered ctx = app.test_request_context() ctx.push() new_index = set_up_index(index) if settings.ENABLE_INSTITUTIONS: migrate_institutions(new_index) migrate_nodes(new_index, delete=delete) migrate_files(new_index, delete=delete) migrate_users(new_index, delete=delete) migrate_collected_metadata(new_index, delete=delete) set_up_alias(index, new_index) if remove: remove_old_index(new_index) ctx.pop()
def main(): if 'dry' not in sys.argv: scripts_utils.add_file_logger(logger, __file__) # Set up storage backends init_app(routes=False) logger.info('{n} invalid GUID objects found'.format(n=len(get_targets()))) logger.info('Finished.')
def main(nworkers, worker_id, dry=True): if not dry: scripts_utils.add_file_logger(logger, __file__) logger.info('Not running in dry mode, changes WILL be made') else: logger.info('Running in dry mode, changes NOT will be made') to_migrate = model.OsfStorageNodeSettings.find(Q('_migrated_from_old_models', 'ne', True)) if to_migrate.count() == 0: logger.info('No nodes to migrate; exiting...') return failed = 0 logger.info('Found {} nodes to migrate'.format(to_migrate.count())) for node_settings in to_migrate: if hash(node_settings._id) % nworkers != worker_id: continue try: with TokuTransaction(): migrate_node_settings(node_settings, dry=dry) migrate_children(node_settings, dry=dry) if not dry: node_settings.reload() node_settings._migrated_from_old_models = True node_settings.save() except Exception as error: logger.error('Could not migrate file tree from {}'.format(node_settings.owner._id)) logger.exception(error) failed += 1 if failed > 0: logger.error('Failed to migrate {} nodes'.format(failed))
def handle(self, *args, **options): global DELETE_COLLISIONS global SKIP_COLLISIONS DELETE_COLLISIONS = options.get('delete_collisions') SKIP_COLLISIONS = options.get('skip_collisions') if DELETE_COLLISIONS and SKIP_COLLISIONS: raise Exception('Cannot specify both delete_collisions and skip_collisions') dry_run = options.get('dry_run') if not dry_run: script_utils.add_file_logger(logger, __file__) addons = options.get('addons', []) if addons: PERMISSIBLE_ADDONS.update(set(addons)) registration_ids = options.get('guids', []) if options.get('check', False): check_registrations(registration_ids) else: verify_registrations(registration_ids) if not dry_run: archive_registrations() log_results(dry_run)
def main(): parser = argparse.ArgumentParser( description='Run w/o args for a dry run or with --no-i-mean-it for the real thing.' ) parser.add_argument( '--no-i-mean-it', action='store_true', dest='for_reals', help='Run migration and commit changes to db', ) parser.add_argument( '--reverse', action='store_true', dest='reverse', help='Run migration in reverse. (e.g. foo.gdoc => foo)', ) parser.add_argument( '--audit', action='store_true', dest='audit', help='Collect stats on mime-types and extensions in the db', ) args = parser.parse_args() if args.for_reals: script_utils.add_file_logger(logger, __file__) init_app(set_backends=True, routes=False) if args.audit: audit() else: with TokuTransaction(): migrate(args.reverse) if not args.for_reals: raise RuntimeError('Dry Run -- Transaction rolled back')
def main(): dry_run = '--dry' in sys.argv if not dry_run: # If we're not running in dry mode log everything to a file script_utils.add_file_logger(logger, __file__) count = 0 preprints = PreprintService.objects.filter( is_published=True, date_published__gte=START_DATETIME, date_published__lte=END_DATETIME ).order_by('date_published').distinct().select_related('node', 'node__creator') for preprint in preprints: auth = Auth(preprint.node.creator) for author in preprint.node.contributors.filter(is_active=False): assert not author.is_registered logger.info('Sending email to unregistered User {} on PreprintService {}'.format(author._id, preprint._id)) if not dry_run: project_signals.contributor_added.send( preprint.node, contributor=author, auth=auth, email_template='preprint' ) count += 1 logger.info('Sent an email to {} unregistered users'.format(count))
def main(): init_app(routes=False) # Sets the storage backends on all models dry = '--dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) with TokuTransaction(): log_duplicate_acount(dry)
def migrate(delete, index=None, app=None): index = index or settings.ELASTIC_INDEX app = app or init_app('website.settings', set_backends=True, routes=True) script_utils.add_file_logger(logger, __file__) # NOTE: We do NOT use the app.text_request_context() as a # context manager because we don't want the teardown_request # functions to be triggered ctx = app.test_request_context() ctx.push() new_index = set_up_index(index) start_time = timezone.now() if settings.ENABLE_INSTITUTIONS: migrate_institutions(new_index) migrate_nodes(new_index) migrate_users(new_index) set_up_alias(index, new_index) # migrate nodes modified since start migrate_nodes(new_index, query=Q('date_modified', 'gte', start_time)) if delete: delete_old(new_index) ctx.pop()
def main(): script_utils.add_file_logger(logger, __file__) app = init_app(attach_request_handlers=False) celery_app.main = 'scripts.send_digest' grouped_digests = group_digest_notifications_by_user() with app.test_request_context(): send_digest(grouped_digests)
def main(): dry = '--dry' in sys.argv if not dry: # If we're not running in dry mode log everything to a file script_utils.add_file_logger(logger, __file__) with transaction.atomic(): start_time = datetime(2017, 12, 20, 8, 25, 25, tzinfo=pytz.UTC) end_time = datetime(2017, 12, 20, 18, 5, 1, tzinfo=pytz.UTC) users = OSFUser.objects.filter(is_registered=True, date_disabled__isnull=True, date_registered__range=[start_time, end_time]) count = 0 for user in users: if settings.MAILCHIMP_GENERAL_LIST not in user.mailchimp_mailing_lists: if not dry: subscribe_mailchimp(settings.MAILCHIMP_GENERAL_LIST, user._id) logger.info('User {} has been subscribed to OSF general mailing list'.format(user._id)) count += 1 logger.info('{} users have been subscribed to OSF general mailing list'.format(count)) if dry: raise Exception('Abort Transaction - Dry Run') print('Done')
def main(): es = connections.get_connection() dry = '--dry' in sys.argv if not dry: utils.add_file_logger(logger, __file__) preprints = Preprint.objects.filter(primary_file__isnull=False).select_related('primary_file', 'provider') total_preprints = preprints.count() logger.info('Collecting data on {} preprints...'.format(total_preprints)) batch_to_update = [] for i, preprint in enumerate(preprints, 1): preprint_id = preprint._id provider_id = preprint.provider._id file_id = preprint.primary_file._id page_counters = ( PageCounter.objects .filter( _id__startswith='download:{preprint_id}:{file_id}:'.format( preprint_id=preprint_id, file_id=file_id ) ).values_list('_id', 'date') ) for page_counter in page_counters: page_counter__id, date = page_counter version_num = page_counter__id.split(':')[-1] for date, totals in date.items(): timestamp = datetime.datetime.strptime(date, '%Y/%m/%d').replace(tzinfo=pytz.utc) batch_to_update.append({ '_index': 'osf_preprintdownload_{}'.format(timestamp.strftime(settings.ELASTICSEARCH_METRICS_DATE_FORMAT)), '_source': { 'count': totals['total'], 'path': '/{}'.format(file_id), 'preprint_id': preprint_id, 'provider_id': provider_id, 'timestamp': timestamp, 'user_id': None, # Pagecounter never tracked this 'version': int(version_num) + 1 }, '_type': 'doc' }) if len(batch_to_update) >= MAX_BATCH_SIZE: logger.info('Bulk-indexing data from {} PageCounter records'.format(len(batch_to_update))) if not dry: bulk(es, batch_to_update, max_retries=3, chunk_size=CHUNK_SIZE, request_timeout=REQUEST_TIMEOUT) batch_to_update = [] # Allow elasticsearch to catch up print('{}/{} preprints completed ({:.2f}%)'.format(i + 1, total_preprints, (i + 1) / total_preprints * 100)) sleep(THROTTLE_PERIOD) # Index final batch if len(batch_to_update): logger.info('Bulk-indexing data from {} PageCounter records'.format(len(batch_to_update))) if not dry: bulk(es, batch_to_update, max_retries=3, chunk_size=CHUNK_SIZE, request_timeout=REQUEST_TIMEOUT) logger.info('This will migrate {} Pagecounter entries to Elasticsearch'.format(len(batch_to_update)))
def main(): dry_run = '--dry' in sys.argv if not dry_run: script_utils.add_file_logger(logger, __file__) init_app(set_backends=True, routes=False) with transaction.atomic(): migrate(dry_run)
def run_main(dry_run=True): if not dry_run: scripts_utils.add_file_logger(logger, __file__) init_app(routes=False) with TokuTransaction(): main() if dry_run: raise RuntimeError("Dry run, rolling back transaction")
def main(dry=True): init_app(set_backends=True, routes=False) if not dry: scripts_utils.add_file_logger(logger, __file__) prereg = MetaSchema.find_one( Q('name', 'eq', "Prereg Challenge")) migrate_drafts_q5_metadata(prereg) migrate_registrations_q5_metadata(prereg)
def handle(self, *args, **options): dry_run = options.get('dry_run', False) if not dry_run: script_utils.add_file_logger(logger, __file__) with transaction.atomic(): update(dry_run) if dry_run: raise RuntimeError('Dry run -- transaction rolled back')
def main(): dry = '--dry' in sys.argv script_utils.add_file_logger(logger, __file__) init_app(set_backends=True, routes=False) with TokuTransaction(): migrate() if dry: raise RuntimeError('Dry run -- Transaction rolled back')
def main(): dry_run = '--dry' in sys.argv if not dry_run: script_utils.add_file_logger(logger, __file__) with transaction.atomic(): update() if dry_run: raise RuntimeError('Dry mode -- rolling back transaction')
def main(): init_app(routes=False) # Sets the storage backends on all models dry = '--dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) with TokuTransaction(): do_migration(get_targets(), dry) if dry: raise RuntimeError('Dry run, transaction rolled back')
def main(): dry_run = "--dry" in sys.argv if not dry_run: script_utils.add_file_logger(logger, __file__) init_app(set_backends=True, routes=False) with TokuTransaction(): migrate(dry_run=dry_run) if dry_run: raise RuntimeError("Dry run, transaction rolled back.")
def run_main(dry_run=True): init_app(routes=False) if not dry_run: # If we're not running in dry mode log everything to a file script_utils.add_file_logger(logger, __file__) # Finally run the migration with transaction.atomic(): main(dry_run=dry_run)
def main(): dry_run = '--dry' in sys.argv if dry_run: logger.warn('DRY RUN mode') else: utils.add_file_logger(logger, __file__) init_app(routes=False) with TokuTransaction(): migrate(dry_run)
def log_errors(self, obj, obj_id, error): if not self.errors: script_utils.add_file_logger(logger, __file__) self.errors += 1 logger.info('Error on {}, {}:'.format(obj, obj_id)) logger.exception(error) if self.errors == 1000: sentry.log_message('ERROR: generate_sitemap stopped execution after reaching 1000 errors. See logs for details.') raise Exception('Too many errors generating sitemap.')
def main(dry=True): init_app(set_backends=True, routes=False) if not dry: scripts_utils.add_file_logger(logger, __file__) veer = MetaSchema.find_one( Q('name', 'eq', "Pre-Registration in Social Psychology (van 't Veer & Giner-Sorolla, 2016): Pre-Registration")) migrate_drafts_metadata_key(veer) migrate_registrations_metadata_key(veer)
def main(): dry_run = False if '--dry' in sys.argv: dry_run = True if not dry_run: script_utils.add_file_logger(logger, __file__) init_app(set_backends=True, routes=False) with TokuTransaction(): migrate(dry_run=dry_run)
def main(): init_app(set_backends=True, routes=False) dry_run = '--dry' in sys.argv if not dry_run: script_utils.add_file_logger(logger, __file__) with transaction.atomic(): update_taxonomies('bepress_taxonomy.json') if dry_run: raise RuntimeError('Dry run, transaction rolled back')
def handle(self, *args, **options): dry_run = options.get('dry_run', False) state = options.get('state', None) if not dry_run: script_utils.add_file_logger(logger, __file__) with transaction.atomic(): add_reviews_notification_setting(notification_type=options['notification'], state=state) if dry_run: raise RuntimeError('Dry run, transaction rolled back.')
def main(): init_app(routes=False) dry_run = '--dry' in sys.argv if dry_run: logger.warn('Running a dry run') if not dry_run: script_utils.add_file_logger(logger, __file__) with TokuTransaction(): migrate(dry=dry_run)
def main(): init_app(routes=False) guid = sys.argv[1] dry = '--dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) with TokuTransaction(): restore_file(guid) if dry: raise RuntimeError('Dry run - rolling back transaction')
def main(): init_app(routes=False) dry = '--dry' in sys.argv if not dry: # If we're not running in dry mode log everything to a file script_utils.add_file_logger(logger, __file__) with TokuTransaction(): migrate() if dry: raise Exception('Abort Transaction - Dry Run')
def run_main(days=None, dry_run=True): init_app(set_backends=True, routes=False) try: days = int(days) except (ValueError, TypeError): days = 60 - 7 # refresh tokens that expire this week delta = relativedelta(days=days) if not dry_run: scripts_utils.add_file_logger(logger, __file__) main(delta, dry_run=dry_run)
.format(user._id, user.fullname, node._id, ' (PUBLIC)' if node.is_public else '')) else: logger.info( 'User {} could not be given a record, because their referrer was anonymous' .format(user._id)) def main(dry_run=True): count = 0 users = OSFUser.objects.filter( date_disabled__isnull=True, is_registered=False, nodes__is_deleted=False, nodes__type='osf.node').include(None).distinct() logger.info('Checking {} unregistered users'.format(users.count())) for user in users: for node in user.nodes.exclude(type='osf.quickfilesnode').exclude( is_deleted=True): if user.unclaimed_records.get(node._id) is None: count += 1 add_missing_unclaimed_record(user, node, dry_run) logger.info('Added {} unclaimed records'.format(count)) if __name__ == '__main__': dry_run = '--dry' in sys.argv if not dry_run: utils.add_file_logger(logger, __file__) main(dry_run=dry_run)
def run_main(dry_run=True): init_app(routes=False) if not dry_run: scripts_utils.add_file_logger(logger, __file__) main(dry_run=dry_run)
version.save() except KeyExistsException: version = models.FileVersion.load(version._id) return version def main(dry=True): init_app(set_backends=True, routes=False) # Sets the storage backends on all models with TokuTransaction(): do_migration() if dry: raise Exception('Abort Transaction - Dry Run') if __name__ == '__main__': dry = 'dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) if 'debug' in sys.argv: logger.setLevel(logging.DEBUG) elif 'warning' in sys.argv: logger.setLevel(logging.WARNING) elif 'info' in sys.argv: logger.setLevel(logging.INFO) elif 'error' in sys.argv: logger.setLevel(logging.ERROR) main(dry=dry)
def main(): dry_run = '--dry' in sys.argv if not dry_run: script_utils.add_file_logger(logger, __file__) setup_django() migrate(dry=dry_run)
def run_main(): add_file_logger(logger, __file__) SnapshotHarness().main(command_line=False)
def run_main(): scripts_utils.add_file_logger(logger, __file__) main()
def main(): init_app(routes=False) # Sets the storage backends on all models dry = 'dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) do_migration(get_targets(), dry)
def main(): dry = 'dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) remove_failed_registrations(dry_run=dry)
def run_main(send_mail=False, white_list=None): scripts_utils.add_file_logger(logger, __file__) if white_list: add_to_white_list(white_list) else: main(send_mail)
def main(dry=True): init_app(set_backends=True, routes=False) if not dry: scripts_utils.add_file_logger(logger, __file__) migrate_drafts(dry)
def handle(self, *args, **options): dry_run = options.get('dry_run', False) if not dry_run: script_utils.add_file_logger(logger, __file__) update_share_preprint_modified_dates(dry_run)
def run_main(date=None, yesterday=False): add_file_logger(logger, __file__) SummaryHarness().main(date, yesterday, False)
def main(): dry = 'dry' in sys.argv if not dry: script_utils.add_file_logger(logger, __file__) notify_desk_about_failed_registrations(dry_run=dry)
def run_main(job_id=None, dry_run=True): init_app(set_backends=True, routes=False) if not dry_run: scripts_utils.add_file_logger(logger, __file__) main(job_id=job_id)
def run_main(dry_run=True): if not dry_run: script_utils.add_file_logger(logger, __file__) with transaction.atomic(): main(dry_run=dry_run)
def main(): es = connections.get_connection() dry = '--dry' in sys.argv if not dry: utils.add_file_logger(logger, __file__) preprints = Preprint.objects.filter( primary_file__isnull=False).select_related('primary_file', 'provider') total_preprints = preprints.count() logger.info('Collecting data on {} preprints...'.format(total_preprints)) batch_to_update = [] for i, preprint in enumerate(preprints, 1): preprint_id = preprint._id provider_id = preprint.provider._id file_id = preprint.primary_file._id page_counters = (PageCounter.objects.filter( _id__startswith='download:{preprint_id}:{file_id}:'.format( preprint_id=preprint_id, file_id=file_id)).values_list( '_id', 'date')) for page_counter in page_counters: page_counter__id, date = page_counter version_num = page_counter__id.split(':')[-1] for date, totals in date.items(): timestamp = datetime.datetime.strptime( date, '%Y/%m/%d').replace(tzinfo=pytz.utc) batch_to_update.append({ '_index': 'osf_preprintdownload_{}'.format( timestamp.strftime( settings.ELASTICSEARCH_METRICS_DATE_FORMAT)), '_source': { 'count': totals['total'], 'path': '/{}'.format(file_id), 'preprint_id': preprint_id, 'provider_id': provider_id, 'timestamp': timestamp, 'user_id': None, # Pagecounter never tracked this 'version': int(version_num) + 1 }, '_type': 'doc' }) if len(batch_to_update) >= MAX_BATCH_SIZE: logger.info( 'Bulk-indexing data from {} PageCounter records'. format(len(batch_to_update))) if not dry: bulk(es, batch_to_update, max_retries=3, chunk_size=CHUNK_SIZE, request_timeout=REQUEST_TIMEOUT) batch_to_update = [] # Allow elasticsearch to catch up print('{}/{} preprints completed ({:.2f}%)'.format( i + 1, total_preprints, (i + 1) / total_preprints * 100)) sleep(THROTTLE_PERIOD) # Index final batch if len(batch_to_update): logger.info('Bulk-indexing data from {} PageCounter records'.format( len(batch_to_update))) if not dry: bulk(es, batch_to_update, max_retries=3, chunk_size=CHUNK_SIZE, request_timeout=REQUEST_TIMEOUT) logger.info( 'This will migrate {} Pagecounter entries to Elasticsearch'.format( len(batch_to_update)))
def handle(self, *args, **options): dry_run = options.get('dry_run', False) if not dry_run: script_utils.add_file_logger(logger, __file__) main(dry_run=dry_run)
except Exception as error: logger.error( 'Email of type {0} to be sent to {1} caused an ERROR'. format(mail.email_type, mail.to_addr)) logger.exception(error) pass def find_queued_mails_ready_to_be_sent(): return mails.QueuedMail.find( Q('send_at', 'lt', datetime.utcnow()) & Q('sent_at', 'eq', None)) def pop_and_verify_mails_for_each_user(user_queue): for user_emails in user_queue.values(): mail = user_emails[0] mails_past_week = mails.QueuedMail.find( Q('user', 'eq', mail.user) & Q('sent_at', 'gt', datetime.utcnow() - settings.WAIT_BETWEEN_MAILS)) if not mails_past_week.count(): yield mail if __name__ == '__main__': dry_run = 'dry' in sys.argv init_app(routes=False) if not dry_run: add_file_logger(logger, __file__) main(dry_run=dry_run)
def run_main(dry_run=True): if not dry_run: add_file_logger(logger, __file__) main(dry_run=dry_run)
pyrax.settings.set('identity_type', 'rackspace') pyrax.set_credentials(storage_settings.USERNAME, storage_settings.API_KEY, region=storage_settings.REGION) container_primary = pyrax.cloudfiles.get_container( storage_settings.PRIMARY_CONTAINER_NAME) container_parity = pyrax.cloudfiles.get_container( storage_settings.PARITY_CONTAINER_NAME) # Connect to AWS layer2 = Layer2( aws_access_key_id=storage_settings.AWS_ACCESS_KEY, aws_secret_access_key=storage_settings.AWS_SECRET_KEY, ) vault = layer2.get_vault(storage_settings.GLACIER_VAULT) # Log to file if not dry_run: scripts_utils.add_file_logger(logger, __file__, suffix=worker_id) audit_temp_path = os.path.join(storage_settings.AUDIT_TEMP_PATH, str(worker_id)) try: os.makedirs(audit_temp_path) except OSError: pass main(nworkers, worker_id, dry_run=dry_run) except Exception as err: logger.error('=== Unexpected Error ===') logger.exception(err) raise err
def main(): dry = '--dry' in sys.argv if not dry: # If we're not running in dry mode log everything to a file script_utils.add_file_logger(logger, __file__) with transaction.atomic(): qs = Registration.objects.filter(_contributors__is_registered=False, is_deleted=False) logger.info( 'Found {} registrations with unregistered contributors'.format( qs.count())) for registration in qs: registration_id = registration._id logger.info( 'Adding unclaimed_records for unregistered contributors on {}'. format(registration_id)) registered_from_id = registration.registered_from._id # Update unclaimed records for all unregistered contributors in the registration for contributor in registration.contributors.filter( is_registered=False): contrib_id = contributor._id # Most unregistered users will have a record for the registration's node record = contributor.unclaimed_records.get(registered_from_id) if not record: # Old unregistered contributors that have been removed from the original node will not have a record logger.info( 'No record for node {} for user {}, inferring from other data' .format(registered_from_id, contrib_id)) # Get referrer id from logs for log in registration.logs.filter( action='contributor_added').order_by('date'): if contrib_id in log.params['contributors']: referrer_id = str( OSFUser.objects.get(id=log.user_id)._id) break else: # This should not get hit. Worst outcome is that resent claim emails will fail to send via admin for this record logger.info('No record of {} in {}\'s logs.'.format( contrib_id, registration_id)) referrer_id = None verification_key = generate_verification_key( verification_type='claim') # name defaults back to name given in first unclaimed record record = { 'name': contributor.given_name, 'referrer_id': referrer_id, 'token': verification_key['token'], 'expires': verification_key['expires'], 'email': None, } logger.info( 'Writing new unclaimed_record entry for user {} for registration {}.' .format(contrib_id, registration_id)) contributor.unclaimed_records[registration_id] = record contributor.save() if dry: raise Exception('Abort Transaction - Dry Run') print('Done')