def handle_noargs(self, **options): database = options.get('database') verbosity = int(options.get('verbosity')) interactive = options.get('interactive') if not database: raise base.CommandError("No MongoDB database specified.") db = connection.get_db(database) if interactive: confirm = raw_input("""You have requested a flush of the database. This will IRREVERSIBLY DESTROY all data currently in the '%s' database (alias '%s'). Are you sure you want to do this? Type 'yes' to continue, or 'no' to cancel: """ % (db.name, database)) else: confirm = 'yes' if confirm == 'yes': try: for collection in db.collection_names(): if collection == 'system.indexes': continue db.drop_collection(collection) except Exception, e: raise base.CommandError("""Database '%s' couldn't be flushed. The full error: %s""" % (database, e)) if verbosity > 1: self.stdout.write("Database '%s' flushed.\n" % database)
def load_scraper(self, type): try: module, cls = settings.TIMETABLE_SCRAPERS.get(type, type).rsplit('.', 1) return getattr(importlib.import_module(module), cls) except ImportError as e: raise management.CommandError('Couldn\'t import %s: %s' % (module, e)) except AttributeError: raise management.CommandError('Scraper %s not found in %s' % (cls, module))
def save(row): if row[0].value in DROP: return try: kws = { column_names[cellidx]: (VALUE_MAPS[column_names[cellidx]].get( cell.value, cell.value) if column_names[cellidx] in VALUE_MAPS else cell.value) for cellidx, cell in enumerate(row) if column_names[cellidx] } except KeyError: msg = 'error mapping {} {}: {}'.format( sheet.title, kws['id'], json.dumps( { column_names[cellidx]: cell.value for cellidx, cell in enumerate(row) if column_names[cellidx] }, indent=2)) if raise_on_error: raise base.CommandError(msg) else: print(msg) try: kws.update(OVERRIDES[kws['id']]) except KeyError: pass try: cls.objects.create(**kws) except (db.Error, exceptions.ValidationError, exceptions.ObjectDoesNotExist) as exc: msg = 'error processing {} {}: {}'.format( sheet.title, kws['id'], json.dumps(kws, indent=2), ) if raise_on_error: raise base.CommandError(msg) elif verbose: print(msg) traceback.print_exc() else: print(msg)
def handle(self, *args, **options): self.stdout.write('Fetching {}...'.format(self.DATA_URL)) response = requests.get(self.DATA_URL) if not response.ok: raise management_base.CommandError( 'Cannot fetch {}. Status: {} {}.'.format( self.DATA_URL, response.status_code, response)) self.stdout.write('Parsing JSON...') data = json.loads(response.text) self.stdout.write('Processing data...') update = PoldnevUpdate.from_poldnev_data(data) if update.is_empty: self.stdout.write('poldnev module is up to date.') return self.describe_update(update) self.stdout.write('') while True: answer = input('Save changes to the database (y/n): ') if answer == 'y': update.apply() self.stdout.write( self.style.SUCCESS( 'poldnev module is successfully updated.')) break elif answer == 'n': self.stdout.write('No changes were applied.') break
def handle_noargs(self, **unused_options): """Perform an OAuth 2.0 oob flow. After the flow completes, instructions are provided to manually store the OAuth2 refresh_token in the project settings file. """ flow = rdbms_googleapi.GetFlow() self.stdout.write( '\nGo to the following link in your browser:\n%s\n\n' % flow.step1_get_authorize_url('oob')) accepted = 'n' while accepted.lower() == 'n': accepted = raw_input('Have you authorized me? (y/n) ') code = raw_input('What is the verification code? ').strip() try: credential = flow.step2_exchange(code) except client.FlowExchangeError: raise base.CommandError('The authentication has failed.') self.stdout.write( '\nAdd your OAuth refresh token (%s) as an "OAUTH2_SECRET" parameter to' ' your database OPTIONS. For example:\n' % credential.refresh_token) self.stdout.write(""" DATABASES = { 'default': { 'ENGINE': 'google.storage.speckle.python.django.backend', 'INSTANCE': 'examplecom:instance', 'NAME': 'dbname', 'OPTIONS': { 'OAUTH2_SECRET': '%s', } } }\n""" % credential.refresh_token)
def handle(self, *args, **options): logger.info('manage.py waitenv...') services = options.pop('services') if services: for srv in services: try: host, port = srv.split(':', 1) wait_for_connection(host, int(port)) except IOError as exc: raise management.CommandError(exc) return for srv in settings.WAIT_FOR: try: wait_for_connection(srv['host'], srv['port']) except IOError as exc: raise management.CommandError(exc)
def plugin_objects(): for plugin in plugins: instance = plugin.get_plugin_instance()[0] if instance is None: raise base.CommandError( "Invalid plugin instance: %s from placeholder %s" % (plugin.pk, plugin.placeholder.pk)) yield instance
def handle_label(self, label, **options): self.stdout.write('Rebuilding TransitionLog states for %s\n' % label) app_label, model_label = label.rsplit('.', 1) model = models.get_model(app_label, model_label) if not hasattr(model, '_workflows'): raise base.CommandError("Model %s isn't attached to a workflow." % label) for field_name, state_field in model._workflows.items(): self._handle_field(label, model, field_name, state_field.workflow, **options)
def load_semester(self, options): year = options['year'] type = options['type'] if not year or not type: raise management.CommandError('Semester year and/or type is missing.') try: return Semester.objects.get(year=year, type=type) except Semester.DoesNotExist: if not options['create']: raise return Semester.objects.create(year=year, type=type)
def handle(self, *file_or_urls, **options): if file_or_urls: parser_module = options.get('parser') parser = None if parser_module: if parser_module.endswith('.py'): parser_module = parser_module[:-3] parser = __import__('kochief.cataloging.parsers.' + parser_module, globals(), locals(), [parser_module]) for file_or_url in file_or_urls: data_handle = urllib.urlopen(file_or_url) # committer is "machine" from fixture #committer = User.objects.get(id=2) if not parser: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.cataloging.parsers.marc as parser else: raise mb.CommandError("Please specify a parser.") #out_handle = open(RDF_FILE, 'w') #count = parser.write_graph(data_handle, out_handle, format='xml') count = 0 if DISCOVERY_INSTALLED: csv_handle = open(CSV_FILE, 'w') writer = csv.DictWriter(csv_handle, parser.FIELDNAMES) fieldname_dict = {} for fieldname in parser.FIELDNAMES: fieldname_dict[fieldname] = fieldname writer.writerow(fieldname_dict) for record in parser.generate_records(data_handle): count += 1 statements = record.get_statements() resource = models.Resource(record.id, statements) resource.save() if DISCOVERY_INSTALLED: row = record.get_row() writer.writerow(row) if count % 1000: sys.stderr.write(".") else: sys.stderr.write(str(count)) data_handle.close() print print "%s records saved" % count if DISCOVERY_INSTALLED: csv_handle.close() import kochief.discovery.management.commands.index as i i.load_solr(CSV_FILE) os.remove(CSV_FILE)
def configure_plugin(self, component_obj, plugin_id, name, description, config): """Configure a plugin for a component.""" try: plugin_obj = models.Plugin.objects.get(id=plugin_id) except models.Plugin.DoesNotExist: raise base.CommandError('Plugin "%s" does not exist' % plugin_id) defaults = {'description': description, 'config': config} pinstance_obj, created = component_obj.plugins.update_or_create( plugin=plugin_obj, name=name, defaults=defaults) pinstance_obj.save() action = 'Created' if created else 'Updated' self.stdout.write(self.style.SUCCESS( '%s %s:%s config' % (action, component_obj.name, plugin_obj.name)))
def handle(self, *file_or_urls, **options): new = options.get('new') if new: data = '<delete><query>*:*</query></delete>' r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request:" print f.read() if file_or_urls: parser = options.get('parser') module = None if parser: if parser.endswith('.py'): parser = parser[:-3] module = __import__('kochief.discovery.parsers.' + parser, globals(), locals(), [parser]) for file_or_url in file_or_urls: if not module: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.discovery.parsers.marc as module if not module: raise mb.CommandError("Please specify a parser.") print "Converting %s to CSV ..." % file_or_url t1 = time.time() data_handle = urllib.urlopen(file_or_url) try: csv_handle = open(CSV_FILE, 'w') record_count = module.write_csv(data_handle, csv_handle, collections=options.get('collections')) finally: csv_handle.close() t2 = time.time() load_solr(CSV_FILE) t3 = time.time() os.remove(CSV_FILE) p_time = (t2 - t1) / 60 l_time = (t3 - t2) / 60 t_time = p_time + l_time rate = record_count / (t3 - t1) print """Processing took %0.3f minutes. Loading took %0.3f minutes. That's %0.3f minutes total for %d records, at a rate of %0.3f records per second. """ % (p_time, l_time, t_time, record_count, rate)
def validate(self, app=None, display_num_errors=False): """ Validates the given app, raising CommandError for any errors. If app is None, then this will validate all installed apps. """ from baph.core.management.validation import get_validation_errors s = StringIO() num_errors = get_validation_errors(s, app) if num_errors: s.seek(0) error_text = s.read() raise base.CommandError( "One or more models did not validate:\n%s" % error_text) if display_num_errors: self.stdout.write("%s error%s found" % (num_errors, num_errors != 1 and 's' or ''))
def _handle_field(self, label, model, field_name, workflow, **options): if not hasattr(workflow, 'log_model') or not workflow.log_model: raise base.CommandError("Field %s of %s does not log to a model." % (field_name, label)) log_model = workflow._get_log_model_class() model_type = ctype_models.ContentType.objects.get_for_model(model) verbosity = int(options.get('verbosity', 1)) if verbosity: self.stdout.write('%r.%s: ' % (model, field_name)) for pk in model.objects.order_by('pk').values_list('pk', flat=True): previous_state = workflow.initial_state qs = (log_model.objects.filter( content_type=model_type, content_id=pk).order_by('timestamp')) if verbosity >= 2: self.stdout.write('\n %d:' % pk) for log in qs: try: workflow.transitions[log.transition] except KeyError: self.stderr.write( "Unknown transition %s in log %d for %s %d\n" % (log.transition, log.pk, label, pk)) continue updated = False if not log.from_state: log.from_state = previous_state updated = True if not log.to_state: log.to_state = workflow.transitions[log.transition].target updated = True previous_state = log.to_state if updated: log.save() if verbosity: self.stdout.write('.') if verbosity: self.stdout.write('\n')
def handle(self, *args, **options): if not settings.LOCAL_GROUPS_AIRTABLE: raise base.CommandError("Airtable environment variables are unset") local_group_types = { "City": models.LocalGroupType.CITY, "Country": models.LocalGroupType.COUNTRY, "University": models.LocalGroupType.UNIVERSITY, } local_groups = [] for table_name, is_active in [("Active", True), ("Inactive", False)]: for record in airtable.Airtable( table_name=table_name, **settings.LOCAL_GROUPS_AIRTABLE).get_all(): airtable_fields = record["fields"] name = airtable_fields["Group Name"] city_or_town = airtable_fields.get("City or Town", "") country = airtable_fields.get("Country", "") local_group = models.LocalGroup( # This will raise an IntegrityError on collisions slug=autoslug_utils.slugify(name), name=name, is_active=is_active, local_group_type=local_group_types.get( airtable_fields.get("University or City")), city_or_town=city_or_town, country=country, website=airtable_fields.get("Website", ""), facebook_group=airtable_fields.get("Facebook Group", ""), facebook_page=airtable_fields.get("Facebook Page", ""), email=airtable_fields.get( "Official Group Email", airtable_fields.get( "LEAN email", airtable_fields.get("EAF email", "")), ), meetup_url=airtable_fields.get("Meetup.com", ""), airtable_record=record["id"], ) if city_or_town and country: self.stdout.write(f"Geocoding: {city_or_town}, {country}") time.sleep(1) local_group.geocode() local_groups.append(local_group) models.LocalGroup.objects.bulk_create(local_groups)
def handle(self, *file_or_urls, **options): new = options.get('new') if new: # create/replace index pass if file_or_urls: parser = options.get('parser') module = None if parser: if parser.endswith('.py'): parser = parser[:-3] module = __import__('kochief.discovery.parsers.' + parser, globals(), locals(), [parser]) for file_or_url in file_or_urls: if not module: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.discovery.parsers.marc as module if not module: raise mb.CommandError("Please specify a parser.") print "Converting %s to CSV ..." % file_or_url t1 = time.time() data_handle = urllib.urlopen(file_or_url) try: csv_handle = open(CSV_FILE, 'w') record_count = module.write_csv(data_handle, csv_handle) finally: csv_handle.close() t2 = time.time() load_solr(CSV_FILE) t3 = time.time() os.remove(CSV_FILE) p_time = (t2 - t1) / 60 l_time = (t3 - t2) / 60 t_time = p_time + l_time rate = record_count / (t3 - t1) print """Processing took %0.3f minutes. Loading took %0.3f minutes. That's %0.3f minutes total for %d records, at a rate of %0.3f records per second. """ % (p_time, l_time, t_time, record_count, rate)
def handle(self, *args, **options): verbose = int(options.get('verbosity')) until = options.get('until') if until: try: # TODO: Support also timezone in the datetime format until = datetime.datetime.strptime(until, '%Y-%m-%dT%H:%M:%S') except ValueError: raise base.CommandError("Use time format 'yyyy-mm-ddThh:mm:ss' (i.e. '2007-03-04T21:08:12').") else: # To make sure is None and not empty string until = None if verbose > 1: self.stdout.write("Downsampling.\n") datastream.downsample_streams(until=until) if verbose > 1: self.stdout.write("Done.\n")
def handle(self, *args, **options) -> None: if not self.program: raise ValueError("Must supply program to run") command_extra_args: typing.List[str] = options.get( "command_extra_args", []) if isinstance(options["source_paths"], typing.List): source_paths: typing.List[str] = options["source_paths"] else: source_paths_gen: typing.Generator[Path, None, None] = options["source_paths"] source_paths = [ f"{source_path}" for source_path in source_paths_gen ] try: self.run_subprocess(command_extra_args, source_paths) except subprocess.CalledProcessError as exc: raise command.CommandError( # type: ignore f"Failed to run {self.program} ({exc.returncode})", returncode=exc.returncode)
def handle(self, *args, **options): try: url = options['url'] or django.conf.settings.CSV_URL csv_fd = io.BytesIO(utils.download_file(url)) except utils.DownloadError: raise base.CommandError("Invalid url: %s" % url) if not options['preserve']: for o in models.ImageData.objects.all(): o.delete() csv_fd.next() for i, (title, description, image_url) in enumerate(csv.reader(csv_fd)): row_num = i + 1 title, description, image_url = self._fix_row( title, description, image_url) if not title or not description: self.stderr.write( self._notice(row_num, 'skipped - empty title and/or description')) continue data_entry = models.ImageData(title=title, description=description) data_entry.save() if not image_url: self.stdout.write(self._notice(row_num, 'empty url')) continue try: data_entry.save(image_url) except utils.DownloadError as e: self.stdout.write(self._notice(row_num, e.message)) continue else: self.stdout.write( self._success(row_num, 'downloaded url "%s"' % image_url))
def handle(self, *args, **options): self.log = getLogger(__name__) self.path = Path(options["csv"]) if not self.path.exists(): raise base.CommandError(f"{self.path} does not exist") with CsvSlicer(self.path) as source: kwargs = { "desc": f"Importing {self.model._meta.verbose_name} data", "total": source.total_lines, "unit": "lines", } with tqdm(**kwargs) as progress_bar: for reader in source.readers: bulk = tuple(self.serialize(line) for line in reader) objs = (obj for obj in bulk if isinstance(obj, self.model)) self.model.objects.bulk_create(objs) progress_bar.update(len(bulk)) self.post_handle() get_city.cache_clear() get_party.cache_clear()
def handle(self, *args, **options): for srv in settings.WAIT_FOR: try: wait_for_connection(srv['host'], srv['port']) except IOError as exc: raise management.CommandError(exc)
def handle(self, *args, **options): try: debug = bool(conf.settings.DEBUG) except KeyError: debug = False if not debug: raise base.CommandError( 'Error! settings.DEBUG is not defined or false. \ This command only works with a development install' ) sys.exit(1) fake = faker.Faker() path = conf.settings.BASE_DIR + 'django_artisan/management/images/' try: path = conf.settings.MANAGEMENT_IMAGE_PATH except: pass try: imagefiles = ([ f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) ]) except Exception as e: raise base.CommandError( "Error! can't get image names. {}".format(e)) if not len(imagefiles): raise base.CommandError('Error! There are no images!') sys.exit(1) user_ids = [] for i in range(options['num_of_users']): self.stdout.write( self.style.SUCCESS('Creating user number {}'.format(i + 1))) first_name = fake.unique.first_name() last_name = fake.unique.last_name() new_user = auth.get_user_model().objects.create( username=first_name + str(uuid.uuid4()), first_name=first_name, last_name=last_name, is_active=True) new_user.profile.display_name = first_name + '-' + last_name new_user.save() user_ids.append(new_user.id) for n in range(conf.settings.MAX_USER_IMAGES): pic = imagefiles[randrange(8)] try: image = new_user.profile.forum_images.create( file=images.ImageFile(file=open(path + pic, 'rb')), text=str(path) + str(pic), title=pic[:30], active=True) img = Image.open(image.file.path) #img = ImageOps.fit(img, (1024,768)) img = ImageOps.expand(img, border=10, fill='white') img.save(image.file.path) get_thumbnail(image.file, "1024x768", format="WEBP", crop='center', quality=70) get_thumbnail(image.file, "500x700", format="WEBP", crop='center', quality=70) except Exception as e: raise base.CommandError( 'Error! creating image for user {} failed! {}'.format( i, e)) break self.stdout.write( self.style.SUCCESS('Successfully created {} users.'.format(i + 1))) self.stdout.write(self.style.SUCCESS('User ids: {}'.format(user_ids)))
def ensure_success(errcode): if errcode != 0: raise management_base.CommandError('Command failed to execute, aborting!')
def handle(self, *args, **options): """ Generates a sanitized dump of the nodewatcher database. """ if len(args) == 0: raise management_base.CommandError('Missing dump archive argument!') # Transform a relative path into an absolute one dest_archive = args[0] if '/' not in dest_archive: dest_archive = os.path.join(os.getcwd(), dest_archive) # A hack to get dumpdata output json = string_io.StringIO() sys.stdout = json management.call_command("dumpdata", *args[1:], **options) sys.stdout = sys.__stdout__ gc.collect() # Get JSON and sanitize the dump json.seek(0) def ensure_success(errcode): if errcode != 0: raise management_base.CommandError('Command failed to execute, aborting!') def object_transformator(): """ Object transformator generator. """ # Read all objects one by one for holder in serializers.deserialize("json", json): object = holder.object name = "%s.%s" % (object.__module__, object.__class__.__name__) # Some objects need to be sanitized if name == 'frontend.nodes.models.Node': if not object.is_dead(): # We do not clean notes for dead nodes as they explain death background object.notes = '' elif name == 'frontend.account.models.UserProfileAndSettings': object.phone_number = '+411234567' object.country = settings.DEFAULT_COUNTRY object.language = settings.LANGUAGES[0][0] object.default_project = project_default() elif name == 'django.contrib.auth.models.User': object.first_name = "Foo" object.last_name = "Bar" object.email = "*****@*****.**" object.password = '******' # Password for all users is 123 elif name == 'frontend.generator.models.Profile': object.root_pass = '******' if not object.wan_dhcp: object.wan_ip = generate_random_ip() object.wan_cidr = 24 net = ipcalc.Network(object.wan_ip, object.wan_cidr) object.wan_gw = str(net.host_first()) elif name == 'frontend.generator.models.StatsSolar': continue elif name == 'frontend.generator.models.WhitelistItem': continue elif name == 'django.contrib.sessions.models.Session': continue elif name == 'django.contrib.auth.models.Message': continue yield holder.object # Perform dump transformation tmp_dir = os.path.join("/tmp", ".__nodewatcher_dump_dir") ensure_success(subprocess.call(["rm", "-rf", tmp_dir])) os.mkdir(tmp_dir) out = open(os.path.join(tmp_dir, "data.json"), "w") serializers.serialize("json", object_transformator(), stream = out) out.close() json.close() if hasattr(settings, 'GRAPH_DIR'): # Copy topology ensure_success(subprocess.call([ "mkdir", os.path.join(tmp_dir, "graphs"), ])) ensure_success(subprocess.call([ "cp", os.path.join(settings.GRAPH_DIR, "network_topology.png"), os.path.join(settings.GRAPH_DIR, "network_topology.dot"), os.path.join(tmp_dir, "graphs"), ])) # Generate a tar.bz2 archive os.chdir(tmp_dir) ensure_success(subprocess.call(["tar cfj {0} *".format(dest_archive)], shell = True)) ensure_success(subprocess.call(["rm", "-rf", tmp_dir]))
def error(self, message): if self.cmd._called_from_command_line: super(CommandParser, self).error(message) else: raise base.CommandError("Error: %s" % message)
def handle_noargs(self, **options): """ Generates a dump of Django CMS database. """ format = options.get('format') indent = options.get('indent') use_natural_keys = options.get('use_natural_keys') show_traceback = options.get('traceback') # Check that the serialization format exists; this is a shortcut to # avoid collating all the objects and _then_ failing. if format not in serializers.get_public_serializer_formats(): raise base.CommandError("Unknown serialization format: %s" % format) try: serializers.get_serializer(format) except KeyError: raise base.CommandError("Unknown serialization format: %s" % format) pages = cms_models.Page.objects.filter(published=True) placeholders = cms_models.Placeholder.objects.filter(page__in=pages) blog_entries = blog_models.Entry.published.all() blog_placeholders = cms_models.Placeholder.objects.filter( entry__in=blog_entries) plugins = cms_models.CMSPlugin.objects.filter( Q(placeholder__in=placeholders) | Q(placeholder__in=blog_placeholders)) def plugin_objects(): for plugin in plugins: instance = plugin.get_plugin_instance()[0] if instance is None: raise base.CommandError( "Invalid plugin instance: %s from placeholder %s" % (plugin.pk, plugin.placeholder.pk)) yield instance objects = itertools.chain( sites_models.Site.objects.all(), contenttypes_models.ContentType.objects.all(), placeholders, pages, cms_models.Title.objects.filter(page__in=pages), blog_placeholders, blog_entries, blog_models.EntryTitle.objects.filter(entry__in=blog_entries), plugins, plugin_objects(), filer_models.Folder.objects.all(), filer_models.File.objects.non_polymorphic().filter(is_public=True), filer_models.Image.objects.non_polymorphic().filter( is_public=True), filer_image_models.ThumbnailOption.objects.all(), snippet_models.Snippet.objects.all(), ) try: return serializers.serialize(format, objects, indent=indent, use_natural_keys=use_natural_keys) except Exception, e: if show_traceback: raise raise base.CommandError("Unable to serialize database: %s" % e)
def handle(self, *args, **options): if "legacy" not in db.connections: raise base.CommandError( "LEGACY_DATABASE_URL environment variable is unset") with db.connections["legacy"].cursor() as cursor: cursor.execute( "SELECT " "LOWER(users.mail), " "IF(" "users.login, " "CONVERT_TZ(" "FROM_UNIXTIME(users.login), @@SESSION.time_zone, '+00:00'" "), " "NULL" "), " "CONVERT_TZ(" "FROM_UNIXTIME(users.created), @@SESSION.time_zone, '+00:00'" "), " "TRIM(LEADING 'user/' FROM url_alias.alias), users.name, " "IFNULL(" "field_data_field_in_which_city_do_you_live_." "field_in_which_city_do_you_live__value, " "''" "), " "IFNULL(" "field_data_field_in_which_country_do_you_li." "field_in_which_country_do_you_li_value, " "''" "), " "IFNULL(cause_areas.global_poverty, FALSE), " "IFNULL(cause_areas.animal_welfare_and_rights, FALSE), " "IFNULL(cause_areas.long_term_future, FALSE), " "IFNULL(cause_areas.cause_prioritisation, FALSE), " "IFNULL(cause_areas.meta, FALSE), " "IFNULL(cause_areas.climate_change, FALSE), " "IFNULL(cause_areas.cause_areas_other, ''), " "IFNULL(field_data_field_skills.field_skills_value, ''), " "IFNULL(field_data_field_more_about_me.field_more_about_me_value, ''), " "users.uid " "FROM users " "LEFT JOIN url_alias ON CONCAT('user/', users.uid) = url_alias.source " "LEFT JOIN " "(" "SELECT uid, MIN(pid) AS pid " "FROM profile " "WHERE type = 'basic_information' " "GROUP BY uid" ") " "AS basic_information " "USING (uid) " "LEFT JOIN field_data_field_in_which_city_do_you_live_ " "ON " "basic_information.pid = " "field_data_field_in_which_city_do_you_live_.entity_id " "AND " "field_data_field_in_which_city_do_you_live_.bundle = " "'basic_information' " "LEFT JOIN field_data_field_in_which_country_do_you_li " "ON " "basic_information.pid = " "field_data_field_in_which_country_do_you_li.entity_id " "AND " "field_data_field_in_which_country_do_you_li.bundle = " "'basic_information' " "LEFT JOIN " "(" "SELECT uid, MIN(pid) AS pid " "FROM profile " "WHERE type = 'your_views_and_values' " "GROUP BY uid" ") " "AS your_views_and_values " "USING (uid) " "LEFT JOIN " "(" "SELECT " "entity_id, " "MAX(coded_cause_area = 1) AS global_poverty, " "MAX(coded_cause_area = 2) AS animal_welfare_and_rights, " "MAX(coded_cause_area = 3) AS long_term_future, " "MAX(coded_cause_area = 4) AS cause_prioritisation, " "MAX(coded_cause_area = 5) AS meta, " "MAX(coded_cause_area = 6) AS climate_change, " "GROUP_CONCAT(" "IF(" "coded_cause_area IS NULL, " "field_interest_in_causes_and_com_value, " "NULL" ") " "ORDER BY delta " "SEPARATOR '; '" ") " "AS cause_areas_other " "FROM " "(" "SELECT " "entity_id, " "delta, " "field_interest_in_causes_and_com_value, " "CASE field_interest_in_causes_and_com_value " "WHEN 'Earn-to-give' THEN 0 " "WHEN 'Entrepreneurship' THEN 0 " "WHEN 'Global poverty' THEN 1 " "WHEN 'Animal welfare' THEN 2 " "WHEN 'Existential risk / far future outcomes' THEN 3 " "WHEN 'Existential risk and far future causes' THEN 3 " "WHEN 'Machine intelligence risk' THEN 3 " "WHEN 'Molecular nanotechnology' THEN 3 " "WHEN 'Nuclear technology' THEN 3 " "WHEN 'Synthetic biology risk' THEN 3 " "WHEN 'Effective giving' THEN 4 " "WHEN 'Prioritization research' THEN 4 " "WHEN 'Movement-building' THEN 5 " "WHEN 'Rationality' THEN 5 " "WHEN 'Climate change' THEN 6 " "ELSE NULL " "END " "AS coded_cause_area " "FROM field_data_field_interest_in_causes_and_com" ") " "AS coded_cause_areas " "GROUP BY entity_id" ") " "AS cause_areas " "ON your_views_and_values.pid = cause_areas.entity_id " "LEFT JOIN " "(" "SELECT uid, MIN(pid) AS pid " "FROM profile " "WHERE type = 'your_career' " "GROUP BY uid" ") " "AS your_career " "USING (uid) " "LEFT JOIN field_data_field_skills " "ON your_career.pid = field_data_field_skills.entity_id " "LEFT JOIN " "(" "SELECT uid, MIN(pid) AS pid " "FROM profile " "WHERE type = 'free_text' " "GROUP BY uid" ") " "AS free_text " "USING (uid) " "LEFT JOIN field_data_field_more_about_me " "ON free_text.pid = field_data_field_more_about_me.entity_id " "WHERE " "users.uid " "AND users.status " "AND NOT EXISTS " "(" "SELECT NULL " "FROM redirect " "WHERE " "redirect.source = url_alias.alias " "AND redirect.redirect != CONCAT('user/', users.uid)" ");") profile_rows = cursor.fetchall() cursor.execute( "SELECT users.uid, TRIM(LEADING 'user/' FROM redirect.source) " "FROM " "redirect " "INNER JOIN users ON redirect.redirect = CONCAT('user/', users.uid) " "WHERE " "redirect.source LIKE 'user/%' " "AND redirect.status " "AND users.uid " "AND users.status;") redirect_rows = list(cursor.fetchall()) @functools.lru_cache(maxsize=None) def geocode(city_or_town, country): if city_or_town and country: self.stdout.write(f"Geocoding: {city_or_town}, {country}") time.sleep(1) location = geocoders.Nominatim( timeout=10).geocode(f"{city_or_town}, {country}") if location: return { "lat": location.latitude, "lon": location.longitude } return {"lat": None, "lon": None} fields = [( email, { "last_login": (last_login and timezone.make_aware(last_login, timezone=timezone.utc)), "date_joined": timezone.make_aware(date_joined, timezone=timezone.utc), }, { "slug": slug, "is_public": False, "name": name, "city_or_town": city_or_town, "country": country, "cause_areas": collect_cause_areas( global_poverty, animal_welfare_and_rights, long_term_future, cause_prioritisation, meta, climate_change, ), "cause_areas_other": cause_areas_other, "summary": html.strip_tags(summary), "legacy_record": legacy_record, **geocode(city_or_town, country), **classify_skills(skills), }, ) for ( email, last_login, date_joined, slug, name, city_or_town, country, global_poverty, animal_welfare_and_rights, long_term_future, cause_prioritisation, meta, climate_change, cause_areas_other, skills, summary, legacy_record, ) in profile_rows] with transaction.atomic(): profiles_by_legacy_record = {} created_by_legacy_record = {} for email, user_fields, profile_fields in fields: user, user_created = base_models.User.objects.get_or_create( email=email, defaults=user_fields) if user_created: profile = models.Profile.objects.create(user=user, **profile_fields) else: user.date_joined = user_fields["date_joined"] user.save() profile, profile_created = models.Profile.objects.get_or_create( user=user, defaults=profile_fields) if not profile_created: profile.legacy_record = profile_fields["legacy_record"] profile.save() slug = profile_fields["slug"] if profile.slug != slug: redirect_rows.append((profile.legacy_record, slug)) profiles_by_legacy_record[profile.legacy_record] = profile created_by_legacy_record[ profile.legacy_record] = user.date_joined models.ProfileSlug.objects.bulk_create([ models.ProfileSlug( content_object=profiles_by_legacy_record[legacy_record], slug=slug, redirect=True, created=created_by_legacy_record[legacy_record], ) for legacy_record, slug in redirect_rows if legacy_record in profiles_by_legacy_record and profiles_by_legacy_record[legacy_record].slug != slug ])
def handle(self, *args, **options): if not options.get('tree'): raise management_base.CommandError('--tree is required') if not os.path.exists(options['tree']): raise management_base.CommandError('File %s does not exist' % options['tree']) if not options.get('vector'): raise management_base.CommandError('--vector is required') if not os.path.exists(options['vector']): raise management_base.CommandError('File %s does not exist' % options['vector']) force = options.get('force') job_status = None try: job_status = JobStatus.JobStatus.objects.get( job_type=JobStatus.TWITTER, job_name=SYNC_DATABASE_JOB) except JobStatus.JobStatus.DoesNotExist: logger.warn('Job status for %s:%s does not exist', JobStatus.TWITTER, SYNC_DATABASE_JOB) if not force: return now = timezone.localtime(timezone.now()) # if job_status and not force and job_status.last_finished + datetime.timedelta(hours=STATS_LATENCY) >= now: # # NOTE: If stats were synced more than STATS_LATENCY hours ago we should not optimize unless force is not # # given. # logger.warn('Stats where synced more than %s hours ago', STATS_LATENCY) # return optimize = options.get('optimize') or False tw_account_id = options.get('tw_account_id') or [] tw_campaign_id = options.get('tw_campaign_id') or [] campaign_id = options.get('campaign_id') or [] tw_line_item_id = options.get('tw_line_item_id') or [] optimize_line_item_id = frozenset( options.get('optimize_line_item_id') or []) or TW_AUTO_OPTIMIZE_LINE_ITEMS tw_account_id_query = TwitterLineItem.TwitterLineItem.objects_raw.order_by( ).distinct().filter(status=choices.STATUS_ENABLED, tw_campaign_id__status=choices.STATUS_ENABLED, tw_campaign_id__tw_account_id__status=choices. STATUS_ENABLED).values_list( 'tw_campaign_id__tw_account_id', flat=True) if tw_campaign_id: tw_account_id_query = tw_account_id_query.filter( tw_campaign_id__in=tw_campaign_id) if tw_account_id: tw_account_id_query = tw_account_id_query.filter( tw_campaign_id__tw_account_id__in=tw_account_id) if tw_line_item_id: tw_account_id_query = tw_account_id_query.filter( tw_line_item_id__in=tw_line_item_id) if campaign_id: tw_account_id_query = tw_account_id_query.filter( tw_campaign_id__campaign_id__in=campaign_id) advertisers_count = tw_account_id_query.count() if not advertisers_count: return workers_count = min( options.get('workers') or 2 * multiprocessing.cpu_count(), advertisers_count or 1) logger.debug('Using %d workers to process %d advertisers', workers_count, advertisers_count) queue = multiprocessing.JoinableQueue(maxsize=workers_count) workers = [] for i in xrange(workers_count): worker = TwitterAccountOptimizer(queue, optimize, campaign_id, tw_campaign_id, tw_line_item_id, optimize_line_item_id, tree_filename=options['tree'], vector_filename=options['vector'], name='worker-{}'.format(i + 1)) workers.append(worker) worker.start() for tw_account_id in tw_account_id_query: queue.put((now, tw_account_id), block=False) queue.close() try: for worker in workers: worker.join() # logger.debug('optimisation took %.2f s', (timezone.now().replace(tzinfo=None) - now).total_seconds()) except KeyboardInterrupt: logger.debug('Received keyboard interrupt') for worker in workers: worker.shutdown = True
def handle(self, *args, **options): verbose = int(options.get('verbosity')) interval = options.get('interval') nstreams = options.get('nstreams') types = options.get('types') flush = options.get('flush') demo = options.get('demo') span = options.get('span') norealtime = options.get('norealtime') if nstreams is None and types is None and not demo and flush: datastream.delete_streams() return elif flush: raise base.CommandError( "Do you really want to remove all datastream data from the database? Use only '--flush' parameter." ) if nstreams is None and types is None and not flush and demo: types = DEMO_TYPE if span == '': span = DEMO_SPAN elif demo: raise base.CommandError("In demo mode other parameters are fixed.") if nstreams is None and types is None: nstreams = DEFAULT_NSTREAMS if types and check_types.match(types): types = split_types.findall(types) if nstreams is not None and len(types) != nstreams: raise base.CommandError( "Number of stream types does not mach number of streams.") nstreams = len(types) elif types: raise base.CommandError( "Invalid stream types string. Must be a comma separated list of <int|float|enum>[(start,end)|(enum values)]." ) streams = [] for i in range(nstreams): if types is not None: typ = types[i] else: typ = ('int', '') if typ[0] == 'enum': value_type = 'nominal' downsamplers = ['count'] elif typ[0] == 'graph': value_type = 'graph' downsamplers = ['count'] else: value_type = 'numeric' downsamplers = datastream.backend.value_downsamplers visualization_value_downsamplers = [] for downsampler in ['mean', 'min', 'max']: if downsampler in downsamplers: visualization_value_downsamplers.append(downsampler) type_constructor, random_function, default_domain = TYPES[typ[0]] domain = typ[1] or default_domain domain_range = [type_constructor(d) for d in domain.split(',')] stream_id = datastream.ensure_stream( {'title': 'Stream %d' % i}, { 'description': lorem_ipsum.paragraph(), 'unit_description': 'random, domain: %s' % domain, 'stream_number': i, 'visualization': { 'type': 'state' if typ is 'enum' else 'line', 'hidden': True if typ is 'graph' else False, 'value_downsamplers': visualization_value_downsamplers, 'time_downsamplers': ['mean'], 'minimum': domain_range[0] if value_type == 'numeric' else None, 'maximum': domain_range[1] if value_type == 'numeric' else None, }, }, downsamplers, datastream.Granularity.Seconds, value_type=value_type, ) streams.append((stream_id, typ)) span = span.split(' ') if len(span) == 1 and span[0]: span = span[0] for val, key in (('days', 'd'), ('hours', 'h'), ('minutes', 'm'), ('seconds', 's')): if span[-1] == key: try: s = int(span[:-1]) except ValueError: raise base.CommandError( "Time span value must be an integer.") span_to = datetime.datetime.now(pytz.utc) last_timestamp = self.last_timestamp(streams) span_from = max( span_to - datetime.timedelta(**{val: s}), last_timestamp + datetime.timedelta(seconds=interval)) break else: raise base.CommandError("Unknown time span unit '%s'." % span[-1]) elif len(span) == 2: try: # TODO: Support also timezone in the datetime format span_from, span_to = [ datetime.datetime.strptime(x, '%Y-%m-%dT%H:%M:%S') for x in span ] except ValueError: raise base.CommandError( "Use time format 'yyyy-mm-ddThh:mm:ss' (i.e. '2007-03-04T21:08:12')." ) else: raise base.CommandError( "Invalid time span parameter. It should be one or two space-delimited values." ) if span_from is not None and span_to is not None and span_from <= span_to: if verbose > 1: td = span_to - span_from self.stdout.write( "Appending %d values from %s to %s.\n" % (((td.seconds + td.days * 24 * 3600) // interval * len(streams)), span_from, span_to)) while span_from <= span_to: for stream_id, (typ, domain) in streams: type_constructor, random_function, default_domain = TYPES[ typ] value = random_function(*[ type_constructor(d) for d in (domain or default_domain).split(',') ]) datastream.append(stream_id, value, span_from) span_from += datetime.timedelta(seconds=interval) if verbose > 1: self.stdout.write("Done. Downsampling.\n") datastream.downsample_streams(until=span_to) if norealtime: return if verbose > 1: self.stdout.write( "Appending real-time value(s) to stream(s) every %s seconds.\n" % interval) while True: for stream_id, (typ, domain) in streams: type_constructor, random_function, default_domain = TYPES[typ] value = random_function(*[ type_constructor(d) for d in (domain or default_domain).split(',') ]) if verbose > 1: self.stdout.write( "Appending value '%s' to stream '%s'.\n" % (value, stream_id)) datastream.append(stream_id, value) datastream.downsample_streams() time.sleep(interval)
def import_spreadsheet(fp, verbose=False, raise_on_error=False, interactive=True, parallel=1): object_count = sum(v[None].objects.count() for v in SPREADSHEET_MAPPINGS.values()) registration_count = sum(v[None].Registrations.objects.count() for v in SPREADSHEET_MAPPINGS.values() if getattr(v[None], 'Registrations', None)) message = """ You have requested an import into the database even though it has {} objects and {} registrations. This may overwrite or conflict with any pre-existing entries. Are you sure you want to do this? Type 'yes' to continue, or 'no' to cancel: """.strip('\n').format(object_count, registration_count) if (interactive and object_count + registration_count and input(message + ' ') != 'yes'): raise base.CommandError("Import cancelled.") wb = openpyxl.load_workbook(fp, read_only=True, data_only=True) total = sum(sheet.max_row - 1 for sheet in wb if sheet.title in SPREADSHEET_MAPPINGS) bar = progress.bar.Bar(max=total, suffix='%(index).0f of %(max).0f - ' '%(elapsed_td)s / %(eta_td)s') try: for sheet in wb: try: mapping = SPREADSHEET_MAPPINGS[sheet.title] except KeyError: continue cls = mapping.pop(None) rows = sheet.rows column_names = [ mapping.get(col.value, col.value and col.value.lower()) for col in next(rows) ] def save(row): if row[0].value in DROP: return try: kws = { column_names[cellidx]: (VALUE_MAPS[column_names[cellidx]].get( cell.value, cell.value) if column_names[cellidx] in VALUE_MAPS else cell.value) for cellidx, cell in enumerate(row) if column_names[cellidx] } except KeyError: msg = 'error mapping {} {}: {}'.format( sheet.title, kws['id'], json.dumps( { column_names[cellidx]: cell.value for cellidx, cell in enumerate(row) if column_names[cellidx] }, indent=2)) if raise_on_error: raise base.CommandError(msg) else: print(msg) try: kws.update(OVERRIDES[kws['id']]) except KeyError: pass try: cls.objects.create(**kws) except (db.Error, exceptions.ValidationError, exceptions.ObjectDoesNotExist) as exc: msg = 'error processing {} {}: {}'.format( sheet.title, kws['id'], json.dumps(kws, indent=2), ) if raise_on_error: raise base.CommandError(msg) elif verbose: print(msg) traceback.print_exc() else: print(msg) if sheet.title == 'state': # HACK: work around the fact that the first state refers # to the second state, by importing them in reverse order for row in reversed(list(itertools.islice(rows, 2))): save(row) bar.next() # executing two saves concurrently ensures that we'll # typically be preparing the next while waiting for the # current one to save in the database with concurrent.futures.ThreadPoolExecutor(parallel) as e: for f in e.map(save, rows): bar.next() finally: bar.finish()