class RawData(models.Model): source_file = models.CharField(max_length=50) date_created = models.DateField(default=timezone.now) time_created = models.TimeField(default=timezone.now) json_data = json_field.JSONField(lazy=False) device_transfer = json_field.JSONField(lazy=False, null=True) def __str__(self): return self.source_file
class CompatReport(amo.models.ModelBase): guid = models.CharField(max_length=128) version = models.CharField(max_length=128) app_guid = models.CharField(max_length=128) app_version = models.CharField(max_length=128) app_build = models.CharField(max_length=128) client_os = models.CharField(max_length=128) client_ip = models.CharField(max_length=128) comments = models.TextField() other_addons = json_field.JSONField() works_properly = models.BooleanField() class Meta: db_table = 'compatibility_reports' @staticmethod def transformer(addons): qs = CompatReport.uncached for addon in addons: works_ = dict( qs.filter( guid=addon.guid).values_list('works_properly').annotate( models.Count('id'))) addon._compat_counts = { 'success': works_.get(True, 0), 'failure': works_.get(False, 0) }
class DataStorage(models.Model): schema = json_field.JSONField(default=dict, null=False, blank=True) created_at = models.DateTimeField(auto_now_add=True) modified_at = models.DateTimeField(auto_now=True) @staticmethod def validate(data, schema): raise NotImplementedError() def update(self, data, validate=True): raise NotImplementedError() @property def content(self): raise NotImplementedError() def output(self, *args): raise NotImplementedError() @property def url(self): raise NotImplementedError() def __str__(self): return "{} {}".format(self.__class__.__name__, self.id) class Meta: abstract = True
class SQLProduct(models.Model): """ A SQL based clone of couch Products. This is used to efficiently filter StockState and other SQL based queries to exclude data for archived products. """ domain = models.CharField(max_length=255, db_index=True) product_id = models.CharField(max_length=100, db_index=True, unique=True) name = models.CharField(max_length=100, null=True) is_archived = models.BooleanField(default=False) code = models.CharField(max_length=100, default='', null=True) description = models.TextField(null=True, default='') category = models.CharField(max_length=100, null=True, default='') program_id = models.CharField(max_length=100, null=True, default='') cost = models.DecimalField(max_digits=20, decimal_places=5, null=True) units = models.CharField(max_length=100, null=True, default='') product_data = json_field.JSONField(default={}, ) created_at = models.DateTimeField(auto_now_add=True) last_modified = models.DateTimeField(auto_now=True) def __unicode__(self): return u"{} ({})".format(self.name, self.domain) def __repr__(self): return "<SQLProduct(domain=%s, name=%s)>" % (self.domain, self.name) @classmethod def by_domain(cls, domain): return cls.objects.filter(domain=domain).all()
class AddonPremium(amo.models.ModelBase): """Additions to the Addon model that only apply to Premium add-ons.""" addon = models.OneToOneField('addons.Addon') price = models.ForeignKey(Price, blank=True, null=True) paypal_permissions_token = models.CharField(max_length=255, blank=True) currencies = json_field.JSONField(default={}) class Meta: db_table = 'addons_premium' def __unicode__(self): return u'Premium %s: %s' % (self.addon, self.price) def has_price(self): return self.price is not None and bool(self.price.price) def get_price(self): return self.price.get_price() def get_price_locale(self): return self.price.get_price_locale() def is_complete(self): return bool(self.addon and self.price and self.addon.paypal_id and self.addon.support_email) def has_permissions_token(self): """ Have we got a permissions token. If you've got 'should_ignore_paypal' enabled, then it will just happily return True. """ return bool(paypal.should_ignore_paypal() or self.paypal_permissions_token) def has_valid_permissions_token(self): """ Have we got a valid permissions token by pinging PayPal. If you've got 'should_ignore_paypal', then it will just happily return True. """ if paypal.should_ignore_paypal(): return True if not self.paypal_permissions_token: return False return paypal.check_permission(self.paypal_permissions_token, ['REFUND']) def supported_currencies(self): """ Return a list of the supported currencies for this app. You get a list of tuples of currency name and the price currency object. USD will always be present since that is the default. """ currencies = self.currencies or {} return [ c for c in self.price.currencies() if c[0] in currencies or c[0] == 'USD' ]
class DocumentMysql(models.Model): properties = json_field.JSONField(default={}) class Meta: abstract = True get_latest_by = "id" ordering = ["id"]
class Result(models.Model): result_class = models.ForeignKey(ResultClass) primary_field_value = models.CharField(max_length=255) secret_field_value = models.CharField(max_length=255) data = json_field.JSONField() results_file = models.FileField(blank=True, upload_to='results/', storage=protected_storage) def __unicode__(self): return '%s (id : %s)' % (self.result_class, self.primary_field_value)
class CompatReport(amo.models.ModelBase): guid = models.CharField(max_length=128) version = models.CharField(max_length=128) app_guid = models.CharField(max_length=128) app_version = models.CharField(max_length=128) app_build = models.CharField(max_length=128) client_os = models.CharField(max_length=128) client_ip = models.CharField(max_length=128) comments = models.TextField() other_addons = json_field.JSONField() works_properly = models.BooleanField() class Meta: db_table = 'compatibility_reports'
class Grant(ValidatingModel, models.Model): """ A parameterized membership between a sub-role and super-role. The parameters applied to the super-role are all those. """ # Database Fields # --------------- from_role = models.ForeignKey( 'Role', help_text='The sub-role begin granted membership or permission', related_name='memberships_granted', ) to_role = models.ForeignKey( 'Role', help_text='The super-role or permission being given', related_name='members', ) assignment = json_field.JSONField( help_text='Assignment from parameters (strings) to values (any JSON-compatible value)', blank=True, default=dict, ) class Meta: app_label = 'django_prbac' # Methods # ------- def instantiated_to_role(self, assignment): """ Returns the super-role instantiated with the parameters of the membership composed with the `parameters` passed in. """ composed_assignment = {} if assignment: for key in self.to_role.parameters & set(assignment.keys()): composed_assignment[key] = assignment[key] if self.assignment: composed_assignment.update(self.assignment) return self.to_role.instantiate(composed_assignment) def __repr__(self): return 'Grant(from_role=%r, to_role=%r, assignment=%r)' % (self.from_role, self.to_role, self.assignment)
class CompatReport(amo.models.ModelBase): guid = models.CharField(max_length=128) version = models.CharField(max_length=128) app_guid = models.CharField(max_length=128) app_version = models.CharField(max_length=128) app_build = models.CharField(max_length=128) client_os = models.CharField(max_length=128) client_ip = models.CharField(max_length=128) comments = models.TextField() other_addons = json_field.JSONField() works_properly = models.BooleanField() class Meta: db_table = 'compatibility_reports' @classmethod def get_counts(self, guid): works = dict( CompatReport.objects.filter( guid=guid).values_list('works_properly').annotate( models.Count('id'))) return {'success': works.get(True, 0), 'failure': works.get(False, 0)}
class Organism(models.Model): community = GenericForeignKey(ct_field="community_type", fk_field="community_id") community_type = models.ForeignKey(ContentType, related_name="+") community_id = models.PositiveIntegerField() schema = json_field.JSONField( default=None, null=False, blank=False) # BUG: schema does not throw IntegrityError on None created_at = models.DateTimeField(auto_now_add=True) modified_at = models.DateTimeField(auto_now=True) @staticmethod def validate(data, schema): raise NotImplementedError() def update(self, data, validate=True): raise NotImplementedError() @property def content(self): raise NotImplementedError() def output(self, *args): raise NotImplementedError() @property def url(self): raise NotImplementedError() def __str__(self): return "{} {}".format(self.__class__.__name__, self.id) class Meta: abstract = True get_latest_by = "created_at" ordering = ["created_at"]
class ResultClass(models.Model): create_date = models.DateTimeField(auto_now_add=True) modify_date = models.DateTimeField(auto_now=True) slug = models.SlugField(unique=True, blank=True) title = models.CharField(max_length=255) description = models.TextField(blank=True) data_csv = models.FileField(upload_to=resultclass_csv_upload_to, storage=protected_storage) fields = json_field.JSONField() primary_field = models.CharField(max_length=255) secret_field = models.CharField(max_length=255) enabled = models.BooleanField() def save(self, *args, **kwargs): if not self.slug: self.slug = slugify(self.title) super(ResultClass, self).save(*args, **kwargs) def __unicode__(self): return self.title def get_absolute_url(self): return reverse('result-class-detail', kwargs={'slug': self.slug})
class Individual(Organism): collective = models.ForeignKey('Collective', null=True) properties = json_field.JSONField(default={}) identity = models.CharField(max_length=255, blank=True, null=True, db_index=True) index = models.SmallIntegerField(blank=True, null=True) def __getitem__(self, key): return self.properties[key] def __setitem__(self, key, value): self.properties[key] = value @property def url(self): if not self.id: raise ValueError("Can't get url for unsaved Individual") return reverse("v1:individual-content", args=[self.id]) # TODO: make version aware @staticmethod def validate(data, schema): """ Validates the data against given schema and checks validity of ds_id and ds_spirit. :param data: The data to validate :param schema: The JSON schema to use for validation. :return: Valid data """ if isinstance(data, dict): properties = data elif isinstance(data, Individual): properties = data.properties else: raise ValidationError( "An Individual can only work with a dict as data and got {} instead" .format(type(data))) try: jsonschema.validate(properties, schema) except SchemaValidationError as exc: djang_exception = ValidationError(exc.message) djang_exception.schema = exc.schema raise djang_exception def update(self, data, validate=True): """ Update the properties and spirit with new data. :param data: The data to use for the update :param validate: (optional) whether to validate data or not (yes by default) :return: Updated content """ if isinstance(data, ( list, tuple, )): data = data[0] self.properties.update(data) if validate: self.validate(self.properties, self.schema) self.save() return self.content @property def content(self): """ Returns the content of this Individual :return: Dictionary filled with properties. """ return dict( { key: value for key, value in self.properties.items() if not key.startswith('_') }, ) @property def json_content(self): return self.get_properties_json() def output(self, *args): return self.output_from_content(self.properties, *args) @staticmethod def output_from_content(content, *args): if len(args) > 1: return map(Individual.output_from_content, repeat(content), args) frm = args[0] if not frm: return frm if isinstance(frm, str): return reach(frm, content) elif isinstance(frm, list): if len(frm) > 1: return Individual.output_from_content(content, *frm) else: return [Individual.output_from_content(content, *frm)] elif isinstance(frm, dict): return { key: Individual.output_from_content(content, value) for key, value in frm.items() } else: raise AssertionError( "Expected a string, list or dict as argument got {} instead". format(type(frm))) def items(self): return self.properties.items() def keys(self): return self.properties.keys() def values(self): return self.properties.values() def clean(self): if self.collective: self.collective.influence(self)
class SQLLocation(MPTTModel): domain = models.CharField(max_length=255, db_index=True) name = models.CharField(max_length=100, null=True) location_id = models.CharField(max_length=100, db_index=True, unique=True) location_type = models.ForeignKey(LocationType) site_code = models.CharField(max_length=255) external_id = models.CharField(max_length=255, null=True) metadata = json_field.JSONField(default={}) created_at = models.DateTimeField(auto_now_add=True) last_modified = models.DateTimeField(auto_now=True) is_archived = models.BooleanField(default=False) latitude = models.DecimalField(max_digits=20, decimal_places=10, null=True) longitude = models.DecimalField(max_digits=20, decimal_places=10, null=True) parent = TreeForeignKey('self', null=True, blank=True, related_name='children') # Use getter and setter below to access this value # since stocks_all_products can cause an empty list to # be what is stored for a location that actually has # all products available. _products = models.ManyToManyField(SQLProduct, null=True) stocks_all_products = models.BooleanField(default=True) supply_point_id = models.CharField(max_length=255, db_index=True, unique=True, null=True) objects = LocationManager() @property def get_id(self): return self.location_id @property def products(self): """ If there are no products specified for this location, assume all products for the domain are relevant. """ if self.stocks_all_products: return SQLProduct.by_domain(self.domain) else: return self._products.all() @products.setter def products(self, value): # this will set stocks_all_products to true if the user # has added all products in the domain to this location self.stocks_all_products = (set(value) == set(SQLProduct.by_domain(self.domain))) self._products = value class Meta: unique_together = ('domain', 'site_code',) def __unicode__(self): return u"{} ({})".format(self.name, self.domain) def __repr__(self): return "<SQLLocation(domain=%s, name=%s)>" % ( self.domain, self.name ) @property def display_name(self): return u"{} [{}]".format(self.name, self.location_type.name) def archived_descendants(self): """ Returns a list of archived descendants for this location. """ return self.get_descendants().filter(is_archived=True) def child_locations(self, include_archive_ancestors=False): """ Returns a list of this location's children. """ children = self.get_children() return _filter_for_archived(children, include_archive_ancestors) @classmethod def root_locations(cls, domain, include_archive_ancestors=False): roots = cls.objects.root_nodes().filter(domain=domain) return _filter_for_archived(roots, include_archive_ancestors) def get_path_display(self): return '/'.join(self.get_ancestors(include_self=True) .values_list('name', flat=True)) def _make_group_object(self, user_id, case_sharing): from corehq.apps.groups.models import UnsavableGroup g = UnsavableGroup() g.domain = self.domain g.users = [user_id] if user_id else [] g.last_modified = datetime.utcnow() if case_sharing: g.name = self.get_path_display() + '-Cases' g._id = self.location_id g.case_sharing = True g.reporting = False else: # reporting groups g.name = self.get_path_display() g._id = LOCATION_REPORTING_PREFIX + self.location_id g.case_sharing = False g.reporting = True g.metadata = { 'commcare_location_type': self.location_type.name, 'commcare_location_name': self.name, } for key, val in self.metadata.items(): g.metadata['commcare_location_' + key] = val return g def case_sharing_group_object(self, user_id=None): """ Returns a fake group object that cannot be saved. This is used for giving users access via case sharing groups, without having a real group for every location that we have to manage/hide. """ return self._make_group_object( user_id, case_sharing=True, ) def reporting_group_object(self, user_id=None): """ Returns a fake group object that cannot be saved. Similar to case_sharing_group_object method, but for reporting groups. """ return self._make_group_object( user_id, case_sharing=False, ) @property @memoized def couch_location(self): return Location.get(self.location_id) def is_direct_ancestor_of(self, location): return (location.get_ancestors(include_self=True) .filter(pk=self.pk).exists()) @classmethod def by_domain(cls, domain): return cls.objects.filter(domain=domain) @property def path(self): # This exists for backwards compatability with couch locations return list(self.get_ancestors(include_self=True) .values_list('location_id', flat=True))
class HttpResource(Resource): """ You can extend from this base class to declare a ``Resource`` that gathers data from a HTTP(S) source. For instance websites and (REST)API's This class is a wrapper around the requests library and provides: * easy follow up of continuation URL's in responses * handle authentication through Datagrowth configs * cached responses when retrieving data a second time Response headers, body and status get stored in the database as well as an abstraction of the request. Any authentication data gets stripped before storage in the database. """ # Identification data_hash = models.CharField(max_length=255, db_index=True, default="") # Getting data request = json_field.JSONField(default=None) # Storing data head = json_field.JSONField(default="{}") body = models.TextField(default=None, null=True, blank=True) # Class constants that determine behavior URI_TEMPLATE = "" PARAMETERS = {} DATA = {} HEADERS = {} FILE_DATA_KEYS = [] GET_SCHEMA = {"args": {}, "kwargs": {}} POST_SCHEMA = {"args": {}, "kwargs": {}} ####################################################### # PUBLIC FUNCTIONALITY ####################################################### # The get and post methods are the ways to interact # with the external resource. # Success and content are convenient to handle the results def send(self, method, *args, **kwargs): """ This method handles the gathering of data and updating the model based on the resource configuration. If the data has been retrieved before it will load the data from cache instead. Specify ``cache_only`` in your config if you want to prevent any HTTP requests. The data might be missing in that case. You must specify the method that the resource will be using to get the data. Currently this can be the "get" and "post" HTTP verbs. Any arguments will be passed to ``URI_TEMPLATE`` to format it. Any keyword arguments will be passed as a data dict to the request. If a keyword is listed in the ``FILE_DATA_KEYS`` attribute on a HttpResource, then the value of that argument is expected to be a file path relative to the ``DATAGROWTH_MEDIA_ROOT``. The value of that keyword will be replaced with the file before making the request. :param method: "get" or "post" depending on which request you want your resource to execute :param args: arguments that will get merged into the ``URI_TEMPLATE`` :param kwargs: keywords arguments that will get send as data :return: HttpResource """ if not self.request: self.request = self._create_request(method, *args, **kwargs) self.uri = HttpResource.uri_from_url(self.request.get("url")) self.data_hash = HttpResource.hash_from_data( self.request.get(HttpResource._get_data_key(self.request))) else: self.validate_request(self.request) self.clean() # sets self.uri and self.data_hash based on request try: resource = self.__class__.objects.get(uri=self.uri, data_hash=self.data_hash) except self.DoesNotExist: if self.config.cache_only: raise DGResourceDoesNotExist( "Could not retrieve resource from cache", resource=self) resource = self if self.config.cache_only: return resource try: self.validate_request(resource.request) except ValidationError: if resource.id: resource.delete() resource = self if resource.success: return resource resource.request = resource.request_with_auth() resource._send() resource.handle_errors() return resource def get(self, *args, **kwargs): """ This method calls ``send`` with "get" as a method. See the ``send`` method for more information. :param args: arguments that will get merged into the URI_TEMPLATE :param kwargs: keywords arguments that will get send as data :return: HttpResource """ return self.send("get", *args, **kwargs) def post(self, *args, **kwargs): """ This method calls ``send`` with "post" as a method. See the ``send`` method for more information. :param args: arguments that will get merged into the URI_TEMPLATE :param kwargs: keywords arguments that will get send as data :return: HttpResource """ return self.send("post", *args, **kwargs) @property def success(self): """ Returns True if status is within HTTP success range :return: Boolean """ return self.status is not None and 200 <= self.status < 209 @property def content(self): """ After a successful ``get`` or ``post`` call this method reads the ContentType header from the HTTP response. Depending on the MIME type it will return the content type and the parsed data. * For a ContentType of application/json data will be a python structure * For a ContentType of text/html or text/xml data will be a BeautifulSoup instance Any other ContentType will result in None. You are encouraged to overextend ``HttpResource`` to handle your own data types. :return: content_type, data """ if self.success: content_type = self.head.get("content-type", "unknown/unknown").split(';')[0] if content_type == "application/json": return content_type, json.loads(self.body) elif content_type == "text/html": return content_type, BeautifulSoup(self.body, "html5lib") elif content_type == "text/xml": return content_type, BeautifulSoup(self.body, "lxml") else: return content_type, None return None, None ####################################################### # CREATE REQUEST ####################################################### # A set of methods to create a request dictionary # The values inside are passed to the requests library # Override parameters method to set dynamic parameters def _create_request(self, method, *args, **kwargs): self._validate_input(method, *args, **kwargs) data = self.data(**kwargs) if not method == "get" else None headers = requests.utils.default_headers() headers["User-Agent"] = "{}; {}".format(self.config.user_agent, headers["User-Agent"]) headers.update(self.headers()) request = { "args": args, "kwargs": kwargs, "method": method, "url": self._create_url(*args), "headers": dict(headers) } data_key = self._get_data_key(request, headers) request[data_key] = data return self.validate_request(request, validate_input=False) def _create_url(self, *args): url_template = copy(self.URI_TEMPLATE) variables = self.variables(*args) url = URLObject(url_template.format(*variables["url"])) params = url.query.dict params.update(self.parameters(**variables)) url = url.set_query_params(params) return str(url) def headers(self): """ Returns the dictionary that should be used as headers for the request the resource will make. By default this is the dictionary from the ``HEADERS`` attribute. :return: (dict) a dictionary representing HTTP headers """ return self.HEADERS def parameters(self, **kwargs): """ Returns the dictionary that should be used as HTTP query parameters for the request the resource will make. By default this is the dictionary from the ``PARAMETERS`` attribute. You may need to override this method. It will receive the return value of the variables method as kwargs. :param kwargs: variables returned by the variables method (ignored by default) :return: (dict) a dictionary representing HTTP query parameters """ return self.PARAMETERS def data(self, **kwargs): """ Returns the dictionary that will be used as HTTP body for the request the resource will make. By default this is the dictionary from the ``DATA`` attribute updated with the kwargs from the input from the ``send`` method. :param kwargs: keyword arguments from the input :return: """ data = dict(self.DATA) data.update(**kwargs) return data def variables(self, *args): """ Parsers the input variables and returns a dictionary with a "url" key. This key contains a list of variables that will be used to format the ``URI_TEMPLATE``. :return: (dict) a dictionary where the input variables are available under names """ args = args or (self.request.get("args") if self.request else tuple()) return {"url": args} def validate_request(self, request, validate_input=True): """ Validates a dictionary that represents a request that the resource will make. Currently it checks the method, which should be "get" or "post" and whether the current data (if any) is still valid or has expired. Apart from that it validates input which should adhere to the JSON schema defined in the ``GET_SCHEMA`` or ``POST_SCHEMA`` attributes :param request: (dict) the request dictionary :param validate_input: (bool) whether to validate input :return: """ if self.purge_at is not None and self.purge_at <= datetime.now(): raise ValidationError( "Resource is no longer valid and will get purged") # Internal asserts about the request assert isinstance(request, dict), "Request should be a dictionary." method = request.get("method") assert method, "Method should not be falsy." assert method in ["get", "post"], \ "{} is not a supported resource method.".format(request.get("method")) # FEATURE: allow all methods if validate_input: self._validate_input(method, *request.get("args", tuple()), **request.get("kwargs", {})) # All is fine :) return request def _validate_input(self, method, *args, **kwargs): """ Will validate the args and kwargs against the JSON schema set on ``GET_SCHEMA`` or ``POST_SCHEMA``, depending on the HTTP method used. :param method: the HTTP method to validate :param args: arguments to validate :param kwargs: keyword arguments to validate :return: """ schemas = self.GET_SCHEMA if method == "get" else self.POST_SCHEMA # FEATURE: allow all methods args_schema = schemas.get("args") kwargs_schema = schemas.get("kwargs") if args_schema is None and len(args): raise ValidationError( "Received arguments for request where there should be none.") if kwargs_schema is None and len(kwargs): raise ValidationError( "Received keyword arguments for request where there should be none." ) if args_schema: try: jsonschema.validate(list(args), args_schema) except SchemaValidationError as ex: raise ValidationError("{}: {}".format(self.__class__.__name__, str(ex))) if kwargs_schema: try: jsonschema.validate(kwargs, kwargs_schema) except SchemaValidationError as ex: raise ValidationError("{}: {}".format(self.__class__.__name__, str(ex))) def _format_data(self, data): """ Will replace any keys that are present in data and the ``FILE_DATA_KEYS`` class attribute with file descriptors. The values of any key is presumed to be a path to a file relative to the ``DATAGROWTH_MEDIA_ROOT``. :param data: (dict) data where some file paths may need to be replaced with actual files :return: (dict) the formatted data """ if data is None: return None, None files = {} for file_key in self.FILE_DATA_KEYS: relative_path = data.get(file_key, None) if relative_path: file_path = os.path.join( datagrowth_settings.DATAGROWTH_MEDIA_ROOT, relative_path) files[file_key] = open(file_path, "rb") data = {key: value for key, value in data.items() if key not in files} # data copy without "files" files = files or None return data, files ####################################################### # AUTH LOGIC ####################################################### # Methods to enable auth for the resource. # Override auth_parameters to provide authentication. def auth_headers(self): """ Returns the dictionary that should be used as authentication headers for the request the resource will make. Override this method in your own class to add authentication. By default this method returns an empty dictionary meaning there are no authentication headers. :return: (dict) dictionary with headers to add to requests """ return {} def auth_parameters(self): """ Returns the dictionary that should be used as authentication parameters for the request the resource will make. Override this method in your own class to add authentication. By default this method returns an empty dictionary meaning there are no authentication parameters. :return: (dict) dictionary with parameters to add to requests """ return {} def request_with_auth(self): """ Get the ``request`` that this resource will make with authentication headers and parameters added. Override ``auth_headers`` and/or ``auth_parameters`` to provide the headers and/or parameters. :return: (dict) a copy of the ``request`` dictionary with authentication added """ url = URLObject(self.request.get("url")) params = url.query.dict params.update(self.auth_parameters()) url = url.set_query_params(params) request = deepcopy(self.request) request["url"] = str(url) request["headers"].update(self.auth_headers()) return request def request_without_auth(self): """ Get the ``request`` that this resource will make with authentication headers and parameters from ``auth_headers`` and ``auth_parameters`` removed. :return: (dict) a copy of the ``request`` dictionary with authentication removed """ url = URLObject(self.request.get("url")) url = url.del_query_params(self.auth_parameters()) request = deepcopy(self.request) request["url"] = str(url) for key in self.auth_headers().keys(): if key in request["headers"]: del request["headers"][key] return request ####################################################### # NEXT LOGIC ####################################################### # Methods to act on continuation for a resource # Override next_parameters to provide auto continuation def next_parameters(self): """ Returns the dictionary that should be used as HTTP query parameters for the continuation request a resource can make. By default this is an empty dictionary. Override this method and return the correct parameters based on the ``content`` of the resource. :return: (dict) a dictionary representing HTTP continuation query parameters """ return {} def create_next_request(self): """ Creates and returns a dictionary that represents a continuation request. Often a source will indicate how to continue gather more data. By overriding the ``next_parameters`` developers can indicate how continuation requests can be made. Calling this method will build a new request using these parameters. :return: (dict) a dictionary representing a continuation request to be made """ if not self.success or not self.next_parameters(): return None url = URLObject(self.request.get("url")) params = url.query.dict params.update(self.next_parameters()) url = url.set_query_params(params) request = deepcopy(self.request) request["url"] = str(url) return request ####################################################### # PROTECTED METHODS ####################################################### # Some internal methods for the get and post methods. def _send(self): """ Does a get or post on the computed link Will set storage fields to returned values """ assert self.request and isinstance(self.request, dict), \ "Trying to make request before having a valid request dictionary." method = self.request.get("method") form_data = self.request.get("data") if not method == "get" else None form_data, files = self._format_data(form_data) json_data = self.request.get("json") if not method == "get" else None request = requests.Request(method=method, url=self.request.get("url"), headers=self.request.get("headers"), data=form_data, json=json_data, files=files) preq = self.session.prepare_request(request) try: response = self.session.send( preq, proxies=datagrowth_settings.DATAGROWTH_REQUESTS_PROXIES, verify=datagrowth_settings.DATAGROWTH_REQUESTS_VERIFY, timeout=self.timeout) except requests.exceptions.SSLError: self.set_error(496, connection_error=True) return except requests.Timeout: self.set_error(504, connection_error=True) return except (requests.ConnectionError, IOError): self.set_error(502, connection_error=True) return except UnicodeDecodeError: self.set_error(600, connection_error=True) return self._update_from_results(response) def _update_from_results(self, response): self.head = dict(response.headers.lower_items()) self.status = response.status_code self.body = response.content if isinstance(response.content, str) else \ response.content.decode("utf-8", "replace") def handle_errors(self): """ Raises exceptions upon error statuses Override this method to raise exceptions for your own error states. By default it raises the ``DGHttpError40X`` and ``DGHttpError50X`` exceptions for statuses. """ class_name = self.__class__.__name__ if self.status >= 500: message = "{} > {} \n\n {}".format(class_name, self.status, self.body) raise DGHttpError50X(message, resource=self) elif self.status >= 400: message = "{} > {} \n\n {}".format(class_name, self.status, self.body) raise DGHttpError40X(message, resource=self) else: return True @staticmethod def _get_data_key(request, headers=None): """ This method returns which key should be used when sending data through the requests library. A JSON request requires the "json" key while other requests require "data". :param request: (dict) a dictionary representing a request :param headers: (dict) a dictionary representing request headers :return: key to use when passing data to the requests library """ if "data" in request: return "data" elif "json" in request: return "json" elif headers: return "json" if headers.get( "Content-Type") == "application/json" else "data" raise AssertionError( "Could not determine data_key for request {} or headers {}".format( request, headers)) ####################################################### # DJANGO MODEL ####################################################### # Methods and properties to tweak Django def __init__(self, *args, **kwargs): self.session = kwargs.pop("session", requests.Session()) self.timeout = kwargs.pop("timeout", 30) super(HttpResource, self).__init__(*args, **kwargs) def clean(self): if self.request and not self.uri: uri_request = self.request_without_auth() self.uri = HttpResource.uri_from_url(uri_request.get("url")) if self.request and not self.data_hash: uri_request = self.request_without_auth() self.data_hash = HttpResource.hash_from_data( uri_request.get("data")) super().clean() ####################################################### # CONVENIENCE ####################################################### # Some static methods to provide standardization @staticmethod def uri_from_url(url): """ Given a URL this method will strip the protocol and sort the parameters. That way a database lookup for a URL will always return URL's that logically match that URL. :param url: the URL to normalize to URI :return: a normalized URI suitable for lookups """ url = URLObject(url) params = sorted(url.query.dict.items(), key=lambda item: item[0]) url = url.with_query(urlencode(params)) return str(url).replace(url.scheme + "://", "") @staticmethod def hash_from_data(data): """ Given a dictionary will recursively sort and JSON dump the keys and values of that dictionary. The end result is given to SHA-1 to create a hash, that is unique for that data. This hash can be used for a database lookup to find earlier requests that send the same data. :param data: (dict) a dictionary of the data to be hashed :return: the hash of the data """ if not data: return "" payload = [] for key, value in data.items(): if not isinstance(value, dict): payload.append((key, value)) else: payload.append((key, HttpResource.hash_from_data(value))) payload.sort(key=lambda item: item[0]) hash_payload = json.dumps(payload).encode("utf-8") hsh = hashlib.sha1() hsh.update(hash_payload) return hsh.hexdigest() @staticmethod def parse_content_type(content_type, default_encoding="utf-8"): """ Given a HTTP ContentType header will return the mime type and the encoding. If no encoding is found the default encoding gets returned. :param content_type: (str) the HTTP ContentType header :param default_encoding: (str) the default encoding when :return: mime_type, encoding """ match = re.match( "(?P<mime_type>[A-Za-z]+/[A-Za-z]+);? ?(charset=(?P<encoding>[A-Za-z0-9\-]+))?", content_type) if match is None: raise ValueError("Could not parse content_type") return match.group( "mime_type"), match.group("encoding") or default_encoding def set_error(self, status, connection_error=False): """ Sets the given status on the HttpResource. When dealing with connection_errors it sets valid defaults. :param status: (int) the error status from the response :param connection_error: (bool) whether the error occurred during a connection error :return: """ if connection_error: self.head = {} self.body = "" self.status = status class Meta: abstract = True
class ShellResource(Resource): """ You can extend from this base class to declare a ``Resource`` that gathers data from a any shell command. This class is a wrapper around the subprocess module and provides: * cached responses when retrieving data a second time The resource stores the stdin, stdout and stderr from commands in the database as well as an abstraction of the command. """ # Getting data command = json_field.JSONField(default=None) stdin = models.TextField(default=None, null=True, blank=True) # Storing data stdout = models.TextField(default=None, null=True, blank=True) stderr = models.TextField(default=None, null=True, blank=True) # Class constants that determine behavior CMD_TEMPLATE = [] FLAGS = {} VARIABLES = {} DIRECTORY_SETTING = None CONTENT_TYPE = "text/plain" SCHEMA = {"arguments": {}, "flags": {}} ####################################################### # PUBLIC FUNCTIONALITY ####################################################### # Call run to execute the command # Success and content are to handle the results # Override transform to manipulate command results def run(self, *args, **kwargs): """ This method handles the gathering of data and updating the model based on the resource configuration. If the data has been retrieved before it will load the data from cache instead. Specify ``cache_only`` in your config if you want to prevent any execution of commands. The data might be missing in that case. Any arguments will be passed to ``CMD_TEMPLATE`` to format it. Any keyword arguments will be parsed into command flags by using the ``FLAGS`` attribute. The parsed flags will be inserted into ``CMD_TEMPLATE`` where ever the ``CMD_FLAGS`` value is present. :param args: get passed on to the command :param kwargs: get parsed into flags before being passed on to the command :return: self """ if not self.command: self.command = self._create_command(*args, **kwargs) self.uri = self.uri_from_cmd(self.command.get("cmd")) else: self.validate_command(self.command) self.clean() # sets self.uri resource = None try: resource = self.__class__.objects.get(uri=self.uri, stdin=self.stdin) except self.DoesNotExist: if self.config.cache_only: raise DGResourceDoesNotExist( "Could not retrieve resource from cache", resource=self) resource = self if self.config.cache_only: return resource try: self.validate_command(resource.command) except ValidationError: if resource.id: resource.delete() resource = self if resource.success: return resource resource._run() resource.handle_errors() return resource @property def success(self): """ Returns True if exit code is 0 and there is some stdout """ return self.status == 0 and bool(self.stdout) @property def content(self): """ After a successful ``run`` call this method passes stdout from the command through the ``transform`` method. It then returns the value of the ``CONTENT_TYPE`` attribute as content type and whatever transform returns as data :return: content_type, data """ if not self.success: return None, None return self.CONTENT_TYPE, self.transform(self.stdout) def transform(self, stdout): """ Override this method for particular commands. It takes the stdout from the command and transforms it into useful output for other components. One use case could be to clean out log lines from the output. :param stdout: the stdout from the command :return: transformed stdout """ return stdout def environment(self, *args, **kwargs): """ You can specify environment variables for the command based on the input to ``run`` by overriding this method. The input from ``run`` is passed down to this method, based on this a dictionary should get returned containing the environment variables or None if no environment should be set. By default this method returns the ``VARIABLES`` attribute without making changes to it. :param args: arguments from the ``run`` command :param kwargs: keyword arguments from the ``run`` command :return: a dictionary with environment variables or None """ if not self.VARIABLES: return None else: return self.VARIABLES def debug(self): """ A method that prints to stdout the command that will get executed by the ``ShellResource``. This is mostly useful for debugging during development. """ print(subprocess.list2cmdline(self.command.get("cmd", []))) ####################################################### # CREATE COMMAND ####################################################### # A set of methods to create a command dictionary # The values inside are passed to the subprocess library def variables(self, *args): """ Parsers the input variables and returns a dictionary with an "input" key. This key contains a list of variables that will be used to format the ``CMD_TEMPLATE``. :return: (dict) a dictionary where the input variables are available under names """ args = args or (self.command["args"] if self.command else tuple()) return {"input": args} def _create_command(self, *args, **kwargs): self._validate_input(*args, **kwargs) # First we format the command template formatter = string.Formatter() arguments = iter(args) cmd = [] for part in self.CMD_TEMPLATE: fields = formatter.parse(part) for literal_text, field_name, format_spec, conversion in fields: if field_name is not None: part = part.format(next(arguments)) cmd.append(part) # Then we set the flags flags = "" try: flags_index = cmd.index("CMD_FLAGS") except ValueError: flags_index = None if flags_index is not None: for key, value in kwargs.items(): if key in self.FLAGS: flags += " " + self.FLAGS[key] + str(value) flags = flags.lstrip() cmd[flags_index] = flags # Returning command command = {"args": args, "kwargs": kwargs, "cmd": cmd, "flags": flags} return self.validate_command(command, validate_input=False) def _validate_input(self, *args, **kwargs): args_schema = self.SCHEMA.get("arguments") kwargs_schema = self.SCHEMA.get("flags") if args_schema is None and len(args): raise ValidationError( "Received arguments for command where there should be none.") if kwargs_schema is None and len(kwargs): raise ValidationError( "Received keyword arguments for command where there should be none." ) if args_schema: try: jsonschema.validate(list(args), args_schema) except SchemaValidationError as ex: raise ValidationError("{}: {}".format(self.__class__.__name__, str(ex))) if kwargs_schema: try: jsonschema.validate(kwargs, kwargs_schema) except SchemaValidationError as ex: raise ValidationError("{}: {}".format(self.__class__.__name__, str(ex))) def validate_command(self, command, validate_input=True): """ Validates a dictionary that represents a command that the resource will run. It currently checks whether the current data (if any) is still valid or has expired. Apart from that it validates input which should adhere to the JSON schema defined in the ``SCHEMA`` attribute. :param request: (dict) the command dictionary :param validate_input: (bool) whether to validate input :return: """ if self.purge_at is not None and self.purge_at <= datetime.now(): raise ValidationError( "Resource is no longer valid and will get purged") # Internal asserts about the request assert isinstance(command, dict), \ "Command should be a dictionary." assert isinstance(command["cmd"], list), \ "Cmd should be a list that can be passed on to subprocess.run" if validate_input: self._validate_input(*command.get("args", tuple()), **command.get("kwargs", {})) # All is fine :) return command def clean_stdout(self, stdout): """ This method decodes the stdout from the subprocess result to UTF-8. Override this method to do any further cleanup. :param stdout: (bytes) stdout from the command :return: (str) cleaned decoded output """ return stdout.decode("utf-8") def clean_stderr(self, stderr): """ This method decodes the stderr from the subprocess result to UTF-8. Override this method to do any further cleanup. :param stderr: (bytes) stderr from the command :return: (str) cleaned decoded output """ return stderr.decode("utf-8") ####################################################### # PROTECTED METHODS ####################################################### # Some internal methods to execute the shell commands # Currently it wraps subprocess def _run(self): """ Does the actual command execution based on the computed link Will set storage fields to returned values """ assert self.command and isinstance(self.command, dict), \ "Trying to run command before having a valid command dictionary." cmd = self.command.get("cmd") cwd = None env = self.environment(*self.command.get("args"), **self.command.get("kwargs")) if self.DIRECTORY_SETTING: cwd = getattr(datagrowth_settings, self.DIRECTORY_SETTING) results = subprocess.run(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env) self._update_from_results(results) def _update_from_results(self, results): self.status = results.returncode self.stdout = self.clean_stdout(results.stdout) self.stderr = self.clean_stderr(results.stderr) def handle_errors(self): """ Raises exceptions upon error statuses Override this method to raise exceptions for your own error states. By default it raises the ``DGShellError`` for any status other than 0. """ if not self.success: class_name = self.__class__.__name__ message = "{} > {} \n\n {}".format(class_name, self.status, self.stderr) raise DGShellError(message, resource=self) ####################################################### # DJANGO MODEL ####################################################### # Methods and properties to tweak Django def clean(self): if self.command and not self.uri: self.uri = ShellResource.uri_from_cmd(self.command.get("cmd")) super().clean() ####################################################### # CONVENIENCE ####################################################### # Some static methods to provide standardization @staticmethod def uri_from_cmd(cmd): """ Given a command list this method will sort that list, but keeps the first element as first element. That way a database lookup for a command will always return a command that logically match that command. Regardless of flag or argument order. At the same time similar commands will appear beneath each other in an overview. :param cmd: the command list as passed to subprocess.run to normalize to URI :return: a normalized URI suitable for lookups """ cmd = copy(cmd) main = cmd.pop(0) cmd.sort() cmd.insert(0, main) return " ".join(cmd) class Meta: abstract = True
class Collective(Organism): # TODO: rename to family indexes = json_field.JSONField(null=True, blank=True, default={}, encoder=IndexEncoder, decoder=IndexDecoder) identifier = models.CharField(max_length=255, null=True, blank=True) @property def url(self): if not self.id: raise ValueError("Can't get url for unsaved Collective") return reverse("v1:collective-content", args=[self.id]) # TODO: make version aware @staticmethod def validate(data, schema): """ Validates the data against given schema for one of more Individuals. :param data: The data to validate :param schema: The JSON schema to use for validation. :return: Valid data """ if not isinstance(data, Iterable): data = [data] for instance in data: Individual.validate(instance, schema) def update(self, data, validate=True, reset=True, batch_size=500): # TODO: rename to "add" and implement "update" """ Update the instance with new data by adding to the Collective or by updating Individuals that are on the Collective. :param data: The data to use for the update :param validate: (optional) whether to validate data or not (yes by default) :return: A list of updated or created instances. """ assert isinstance(data, (Iterator, list, tuple, dict, Individual)), \ "Collective.update expects data to be formatted as iteratable, dict or Individual not {}".format(type(data)) if reset: self.individual_set.all().delete() def prepare_updates(data): prepared = [] if isinstance(data, dict): if validate: Individual.validate(data, self.schema) individual = Individual(community=self.community, collective=self, schema=self.schema, properties=data) individual.clean() prepared.append(individual) elif isinstance(data, Individual): if validate: Individual.validate(data, self.schema) data.id = None data.collective = self data.clean() prepared.append(data) else: # type is list for instance in data: prepared += prepare_updates(instance) return prepared update_count = 0 for updates in ibatch(data, batch_size=batch_size): updates = prepare_updates(updates) update_count += len(updates) Individual.objects.bulk_create(updates, batch_size=settings.MAX_BATCH_SIZE) return update_count @property def content(self): """ Returns the content of the members of this Collective :return: a generator yielding properties from Individual members """ return (ind.content for ind in self.individual_set.iterator()) @property def has_content(self): """ Indicates if Collective entails Individuals or not :return: True if there are Individuals, False otherwise """ return self.individual_set.exists() @property def json_content(self): json_content = [ind.json_content for ind in self.individual_set.all()] return "[{}]".format(",".join(json_content)) def output(self, *args): if len(args) > 1: return map(self.output, args) frm = args[0] if not frm: return [frm for ind in range(0, self.individual_set.count())] elif isinstance(frm, list): output = self.output(*frm) if len(frm) > 1: output = [list(zipped) for zipped in zip(*output)] else: output = [[out] for out in output] return output else: return [ind.output(frm) for ind in self.individual_set.iterator()] def group_by(self, key): """ Outputs a dict with lists. The lists are filled with Individuals that hold the same value for key. :param key: :return: """ grouped = {} for ind in self.individual_set.all(): assert key in ind.properties, \ "Can't group by {}, because it is missing from an individual on collective {}".format(key, self.id) value = ind.properties[key] if value not in grouped: grouped[value] = [ind] else: grouped[value].append(ind) return grouped def _get_index_keys(self): return [item[0] for item in next(six.iterkeys(self.indexes))] def build_index(self, keys): """ :param keys: :return: """ assert isinstance(keys, list) and len(keys), \ "Expected a list with at least one element for argument keys." individuals = [] for ind in self.individual_set.all(): self.set_index_for_individual(ind, keys) individuals.append(ind) self.update(individuals) self.save() def set_index_for_individual(self, individual, index_keys): index = tuple([(key, individual[key]) for key in index_keys]) if index not in self.indexes: index_code = len(self.indexes) self.indexes[index] = index_code individual.index = self.indexes[index] return individual def influence(self, individual): """ This allows the Collective to set some attributes and or properties on the Individual :param individual: The individual that should be influenced :return: The influenced individual """ if self.identifier: individual.identity = reach("$." + self.identifier, individual.properties) if self.indexes: index_keys = self._get_index_keys() individual = self.set_index_for_individual(individual, index_keys) return individual def select(self, **kwargs): select = set() for item in six.iteritems(kwargs): for index in self.indexes.keys(): if item in index: select.add(self.indexes[index]) return self.individual_set.filter(index__in=select)