class TranspylerKernel(IPythonKernel): """ A meta kernel based backend to use Transpyled languages in Jupyter/iPython. """ transpyler = None implementation = lazy(lambda self: 'i' + self.transpyler.name) implementation_version = lazy(lambda self: self.transpyler.version) language = lazy(lambda self: self.transpyler.name) language_version = lazy(lambda self: self.transpyler.language_version) banner = lazy(lambda self: self.transpyler.console_banner()) language_info = lazy(lambda self: self.transpyler.info.get_language_info()) shell_class = Type(TranspylerShell) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.transpyler is None: raise ValueError('transpyler was not defined') monkey_patch(self.transpyler) self.transpyler.init() def do_execute(self, code, *args, **kwargs): code = self.transpyler.transpile(code) return super().do_execute(code, *args, **kwargs) def do_is_complete(self, code): return super().do_is_complete(self.transpyler.transpile(code))
class Classifier: """ Abstract classifier interface. """ _label_set = lazy(lambda x: sorted(set(x.labels))) _labels_map = lazy(lambda x: {x: i for i, x in enumerate(x._label_set)}) @lazy def _labels_encoder(self): sorted_items = sorted(self._labels_map.items(), key=lambda _: _[1]) return [k for k, i in sorted_items] @lazy def _integer_labels(self): map = self._labels_map return np.array([map[label] for label in self.labels]) def __init__(self, training_data, transformer, labels): self.training_data = np.asarray(training_data) self.labels = np.asarray(labels) self._transformer = transformer def __call__(self, pop): """ self(pop) <==> self.classify(pop) """ return self.classify(pop) def transform(self, pop): """ Transform an individual or population to a raw np.ndarray data set. """ if getattr(pop, 'is_individual', False): pop = population.Population([pop.data]) return self._transformer(pop) def classify(self, pop): """ Classify a population object. """ is_individual = getattr(pop, 'is_individual', False) data = self.transform(pop) result = self.classify_data(data) return result[0] if is_individual else result def classify_data(self, data): """ Classify raw data returning a list of labels. """ raise NotImplementedError def prob_matrix(self, pop): """ Return a matrix with the probability that each individual is classified with each label. Individuals are represented in the rows and labels in the columns. Label indexes are assigned by ordering, e.g., If the original labels contain 'foo', 'bar' and 'baz', 'bar' will be assigned a column index of 0 (because it is the first in alphabetical order), 'baz' will be the second column and 'foo' the third. """ logp = self.log_prob_matrix(pop) logp -= logp.max() probs = np.exp(logp) probs /= probs.sum() return probs def prob_list(self, pop): """ Return a list of :cls:`kpop.Prob` objects with the probabilities assigned to each label classification. """ values = self._labels_encoder return [Prob(zip(values, row)) for row in self.prob_matrix(pop)] def prob_table(self, pop): """ Return a pandas dataframe with the probabilities that each individual belongs to each label. """ from pandas import DataFrame data = self.prob_matrix(pop) return DataFrame(data, columns=self._labels_encoder) def log_prob_matrix(self, pop): """ Like :meth:`prob_matrix`, but returns the log probabilities. """ if type(self).prob_matrix is not Classifier.prob_matrix: return np.log(self.prob_matrix(pop)) raise NotImplementedError( "either 'log_prob_matrix' or 'prob_matrix' must be defined") def log_prob_list(self, pop): """ Like :meth:`prob_list`, but returns the log probabilities. """ values = self._labels_encoder return [ Prob(zip(values, row), normalize=False) for row in self.log_prob_matrix(pop) ] def log_prob_table(self, pop): """ Like :meth:`prob_table`, but returns the log probabilities. """ from pandas import DataFrame data = self.log_prob_matrix(pop) return DataFrame(data, columns=self._labels_encoder)
class PopulationBase(collections.Sequence, metaclass=abc.ABCMeta): """ Base class for Population and MultiPopulation. Attrs: freqs: A list of :cls:`kpop.Prob` objects representing the probabilities of each loci. freqs_matrix: A full matrix with the shape (num individuals, max num of alleles) with the probability for each allele. freqs_vector: Frequencies for allele 1. This is more useful for biallelic data, since the frequency of the second allele is simply the complement. hfreqs_vector: Vector of frequencies of heterozygotes. """ # General shape size = property(len) num_loci = lazy(lambda _: _[0].num_loci) ploidy = lazy(lambda _: _[0].ploidy) shape = property(lambda _: (_.size, _.num_loci, _.ploidy)) data_size = fn_property(_.size * _.num_loci * _.ploidy) dtype = np.dtype('uint8') _shape_attrs = ( 'size', 'num_loci', 'ploidy', 'shape', 'data_size', ) # Frequencies freqs = property(get_freqs, set_freqs) freqs_matrix = lazy(lambda _: freqs_to_matrix(_.freqs)) freqs_vector = lazy(lambda _: np.ascontiguousarray(_.freqs_matrix[:, 0])) hfreqs_vector = lazy(hfreqs_vector) # Allele statistics allele_names = None is_biallelic = fn_lazy(_.num_alleles == 2) num_alleles = lazy(lambda _: max(max(freq) for freq in _.freqs)) # Multi population is_multi_population = False num_populations = fn_property(lambda _: len(_.populations)) # Missing data has_missing_data = property(lambda _: any(ind.has_missing for ind in _)) missing_data_total = property( lambda _: sum(ind.missing_total for ind in _)) missing_data_ratio = fn_property(_.missing_data_total / _.data_size) # Meta information individual_ids = property(lambda _: _.meta['ids']) # Special attributes. These will be inserted later via monkey patching populations = () admixture = Admixture() clusterization = Clusterization() classification = Classification() io = Io() plot = Plot() projection = Projection() simulation = Simulation() statistics = Statistics() # Aliases admix = property(lambda self: self.admixture) cls = property(lambda self: self.classification) cluster = property(lambda self: self.clusterization) proj = property(lambda self: self.projection) sim = property(lambda self: self.simulation) stats = property(lambda self: self.statistics) # List of cacheable attributes _cacheable_attributes = ( 'has_missing', 'missing_total', 'missing_ratio', 'is_biallelic', 'num_alleles', 'admixture', 'clustering', 'classification', 'io', 'plot', 'projection', 'simulation', 'statistics', ) def __init__(self, freqs=None, allele_names=None, id=None, ploidy=None, num_loci=None, num_alleles=None, individual_ids=None): # Normalize frequencies if freqs is None: self._freqs = None elif len(freqs) == 0: raise ValueError('cannot initialize from empty frequencies') elif isinstance(freqs[0], collections.Mapping): self._freqs = [Prob(p) for p in freqs] else: freqs = np.asarray(freqs) if freqs.ndim == 2: self._freqs = [Prob(dict(enumerate(p, 1))) for p in freqs] self.freqs_matrix = np.array(freqs) self.num_alleles = freqs.shape[1] elif freqs.ndim == 1: self._freqs = [Prob({1: p, 2: 1 - p}) for p in freqs] self.freqs_vector = np.array(freqs) self.freqs_matrix = fill_freqs_vector(self.freqs_vector) self.num_alleles = 2 else: raise ValueError('invalid frequency data') # Fix num_loci from data if self._freqs is not None: self.num_loci = len(self._freqs) if num_loci is not None and num_loci != self.num_loci: raise ValueError('invalid value for num_loci') elif num_loci is not None: self.num_loci = num_loci # Individual ids if individual_ids is None: fmt = 'ind%s' if id is None else '%s%%s' % id individual_ids = [fmt % i for i in range(1, self.size + 1)] # Save required attributes self.allele_names = allele_names self.id = id self._last_id_index = 0 self.meta = pd.DataFrame({'ids': individual_ids}) # Save optional given lazy attributes if ploidy is not None: self.ploidy = ploidy if num_alleles is not None: self.num_alleles = num_alleles def __repr__(self): return self.io.render(id_align='best', limit=20, ind_limit=10) def __str__(self): return self.io.render(id_align='best') def __eq__(self, other): if not isinstance(other, PopulationBase): return NotImplemented if self.shape != other.shape: return False return all(x == y for x, y in zip(self, other)) def _population(self, *args, **kwargs): from kpop import Population return Population(*args, **kwargs) def _next_id(self): self._last_id_index += 1 return '%s%s' % (self.id or 'ind', self._last_id_index) def _clear_caches(self): discard_attrs(self, self._cacheable_attributes) def _as_array(self): return NotImplementedError('must be implemented on subclasses') def as_array(self, which='raw'): """ Convert to a numpy data array using the requested conversion method. This is a basic pre-processing step in many dimensionality reduction algorithms. Genotypes are categorical data and usually it doesn't make sense to treat the integer encoding used in kpop as ordinal data (there is no ordering implied when treating say, allele 1 vs allele 2 vs allele 3). Conversion methods: * raw: An 3 dimensional array of (size, num_loci, ploidy) for raw genotype data. Each component represents the value of a single allele. * flat: Like raw, but flatten the last dimension into a (size, num_loci * ploidy) array. This creates a new feature per loci for each degree of ploidy in the data. * rflat: Flatten data, but first shuffle the positions of alleles at each loci. This is recommended if data does not carry reliable haplotype information. * raw-unity, flat-unity, rflat-unity: Normalized versions of "raw", "flat", and "rflat" methods. All components are rescaled with zero mean and unity variance. * count: Force conversion to biallelic data and counts the number of occurrences of the first allele. Most methdds will require normalization, so you probably should consider an specific method such as count-unity, count-snp, etc * count-unity: Normalized version of count scaled to zero mean and unity variance. * count-snp: Normalizes each feature using the standard deviation expected under the assumption of Hardy-Weinberg equilibrium. This procedure is described at Patterson et. al., "Population Structure and Eigenanalysis" and is recommended for SNPs subject to genetic drift. * count-center: Instead of normalizing, simply center data by subtracting half the ploidy to place it into a symmetric range. This normalization puts data into a cube with a predictable origin and range. For diploid data, the components will be either -1, 0, or 1. Returns: An ndarray with transformed data. """ data = self._as_array() # Raw conversion if which == 'raw': return data elif which == 'raw-unity': data = data - data.mean(axis=0) std = data.std(axis=0) data /= np.where(std, std, 1) return data # Flattened representations elif which in {'flat', 'flat-unity'}: data = data.reshape(self.size, self.num_loci * self.ploidy) if which == 'flat-unity': return preprocessing.scale(data.astype(float)) return data elif which in {'rflat', 'rflat-unity'}: return self.shuffle_loci().as_array(which[1:]) # Counters elif which in {'count', 'count-unity', 'count-snp', 'count-center'}: count = (np.array(data) == 1).sum(axis=2) if which == 'count-unity': return preprocessing.scale(count.astype(float)) elif which == 'count-snp': mu = count.mean(axis=0) p = mu / self.ploidy norm = np.sqrt(p * (1 - p)) norm = np.where(norm, norm, 1) return (count - mu) / norm elif which == 'count-center': if self.ploidy % 2: return count - self.ploidy / 2 else: return count - self.ploidy // 2 else: return count raise ValueError('invalid conversion method: %r' % which) def drop_non_biallelic(self, **kwargs): """ Creates a new population that remove all non-biallelic loci. Returns: A (population, removed) tuple with the new population and a list of of all dropped locus indexes. """ bad_loci = self.statistics.non_biallelic() return self.drop_loci(bad_loci, **kwargs), bad_loci def force_biallelic(self, **kwargs): """ Return a new population with forced biallelic data. If a locus has more than 2 alleles, the most common allele is picked as allele 1 and the alternate allele 2 comprises all the other alleles. """ alleles_mapping = [biallelic_mapping(prob) for prob in self.freqs] return self.map_alleles(alleles_mapping, **kwargs) def sort_by_allele_freq(self, **kwargs): """ Return a new population in which the index attributed to each allele in each locus is sorted by the frequency in the population. After that, allele 1 will be the most common, allele 2 is the second most common and so on. """ alleles_mapping = [sorted_allele_mapping(prob) for prob in self.freqs] return self.map_alleles(alleles_mapping, **kwargs) @abc.abstractmethod def map_alleles(self, alleles_mapping, **kwargs): """ Create new population reorganizing all allele values by the given list of allele values mappings. Args: alleles_mapping: A list with num_loci elements. Each element must be a mapping from the old allele values to the new ones. If an element is an empty dictionary, no remapping is done. """ raise NotImplementedError('must be implemented in subclasses') def drop_loci(self, indexes, **kwargs): """ Create a new population with all loci in the given indexes removed. """ indexes = set(indexes) keep = np.array([i for i in range(self.num_loci) if i not in indexes]) return self.keep_loci(keep, **kwargs) def drop_individuals(self, indexes, **kwargs): """ Creates new population removing the individuals in the given indexes. """ indexes = set(indexes) keep = np.array([i for i in range(self.size) if i not in indexes]) return self.keep_individuals(keep, **kwargs) @abc.abstractmethod def keep_loci(self, indexes, **kwargs): """ Creates a new population keeping only the loci in the given indexes. """ raise NotImplementedError('must be implemented in subclasses') @abc.abstractmethod def keep_individuals(self, indexes, **kwargs): """ Creates new population removing the individuals in the given indexes. """ raise NotImplementedError('must be implemented in subclasses') def shuffle_loci(self, **kwargs): """ Return a copy with shuffled contents of each locus. """ pop = self.copy(**kwargs) for ind in pop: for loci in ind.data: np.random.shuffle(loci) return pop def copy(self, id=None): """ Return a copy of population. """ new = copy.copy(self) new.populations = copy.copy(self.populations) new._clear_caches() if id is not None: new.id = id return new
class Transpyler(metaclass=SingletonMeta): """ Base class for all new Transpylers. A transpyler is a singleton object. Very simple Python variations can be created by subclassing Transpyler:: class PyBr(Transpyler): translations = { 'para': 'for', # single token translations 'em': 'in', ('para', 'cada'): 'for', # token sequence translations ('faça', ':'): ':', } Now we can create an object with exec(), eval() and compile() functions that handle the newly defined transpyler: pybr = PyBr() global_ns = {} pybr.exec(''' x, y = 1, 1 para cada i em [1, 2, 3, 4, 5] faça: x, y = y, x + y ''', global_ns) assert globals_ns['x'] == 8 assert globals_ns['y'] == 13 """ # Cache builtins _compile = _compile _exec = _exec _eval = _eval _input = _input _print = _print # Factories and subclasses lexer_factory = Lexer info_factory = Info introspection_factory = Introspection namespace_factory = Namespace # Constants lang = 'en' has_turtle_functions = False turtle_backend = None standard_lib = None translations = None invalid_tokens = None language_version = '0.1.0' version = '0.1.0' codemirror_mode = 'python' file_extension = 'py' # Language info and introspection introspection = lazy(lambda self: self.introspection_factory(self)) info = lazy(lambda self: self.info_factory(self)) mimetypes = lazy(lambda self: [self.mimetype]) mimetype = lazy(lambda self: 'text/x-%s' % self.name) link_docs = lazy(lambda self: "http://%s.readthedocs.io/%s/latest/" % (self.name, self.lang)) link_github = lazy( lambda self: "http://github.com/transpyler/%s/" % self.name) translate = lazy(lambda self: translator_factory(self.lang)) # Display messages display_name = lazy(lambda self: self.name.title().replace('_', ' ')) short_banner = lazy(lambda self: self.translate( '%s %s\n' 'Type "help", "copyright" or "license" for more information.' % (self.display_name, self.version))) long_banner = lazy(lambda self: self.short_banner) use_short_banner = True # Lexer lexer = lazy(lambda self: self.lexer_factory(self)) @lazy def name(self): cls_name = self.__class__.__name__.lower() if cls_name == 'transpyler': return 'transpyler' elif cls_name.endswith('transpyler'): return cls_name[:-10] else: return cls_name @lazy def namespace(self): return self.recreate_namespace() def __init__(self, **kwargs): self._forbidden = False for k, v in kwargs.items(): setattr(self, k, v) self._has_init = False assert self.name, 'Name cannot be empty' def __repr__(self): return '<%s: %r>' % (self.__class__.__name__, self.name) # # System functions # def init(self, ns=None): """ Initializes transpyler runtime. Args: ns (dict): A dictionary with extra functions to be added to the globals namespace at runtime. """ self.apply_curses() self.namespace.update(ns or {}) def apply_curses(self): """ Apply any required curses. Default implementation does nothing. """ def compile(self, source, filename, mode, flags=0, dont_inherit=False, compile_function=None): """ Similar to the built-in function compile() for Transpyled code. The additional compile_function() argument allows to define a function to replace Python's builtin compile(). Args: source (str or code): Code to be executed. filename: File name associated with code. Use '<input>' for strings. mode: One of 'exec' or 'eval'. The second allows only simple statements that generate a value and is used by the eval() function. forbidden (bool): If true, initialize the forbidden lib functionality to enable i18n for Python builtins in C-level. compile_function (callable): A possible replacement for Python's built-in compile(). """ compile_function = compile_function or _compile source = self.transpile(source) return compile_function(source, filename, mode, flags, dont_inherit) def exec(self, source, globals=None, locals=None, exec_function=None): """ Similar to the built-in function exec() for transpyled code. The additional exec_function() argument allows to define a function to replace Python's builtin compile(). Args: source (str or code): Code to be executed. globals, locals: A globals/locals dictionary exec_function (callable): A possible replacement for Python's built-in exec(). """ exec_function = exec_function or _exec code = self.transpile(source) if isinstance(source, str) else source globals = {} if globals is None else globals globals.update(self.namespace) args = (globals, ) if locals is None else (globals, locals) return exec_function(code, *args) def eval(self, source, globals=None, locals=None, eval_function=None): """ Similar to the built-in function eval() for transpyled code. The additional eval_function() argument allows to define a function to replace Python's builtin compile(). Args: source (str or code): Code to be executed. globals, locals: A globals/locals dictionary eval_function (callable): A possible replacement for Python's built-in eval(). """ eval_function = eval_function or _eval code = self.transpile(source) if isinstance(source, str) else source globals = {} if globals is None else globals globals.update(self.namespace) args = (globals, ) if locals is None else (globals, locals) return eval_function(code, *args) def transpile(self, src): """ Convert source to Python. """ return self.lexer.transpile(src) def is_incomplete_source(self, src, filename="<input>", symbol="single"): """ Test if a given source code is incomplete. Incomplete code may appear in users interactions when user is typing a multi line command: for x in range(10): ... should continue here, but user already pressed enter! """ try: pytuga_src = self.transpile(src) except SyntaxError: return True return codeop.compile_command(pytuga_src, filename, symbol) is None @classmethod # noqa: C901 (it only creates functions on a closure) def core_functions(cls): """ Return an dictionary with a small namespace for the core functions in the transpyler API: * init: init runtime * compile: compile a string of source code * exec: execute a string of source code * eval: evaluate a string of source code and return the resulting object * transpile: transpile source code to Python * namespace: return a dictionary with builtin functions * is_complete_source: check if string can be executed as-is or if it requires additional lines of code in order to execute. """ def init(ns=None): return cls().init(ns) def compile(source, filename, mode, flags=0, dont_inherit=False, compile_function=None): return cls().compile(source, filename, mode, flags=flags, dont_inherit=dont_inherit, compile_function=compile_function) def exec(source, globals=None, locals=None, exec_function=None): return cls().exec( source, globals=globals, locals=locals, exec_function=exec_function, ) def eval(source, globals=None, locals=None, eval_function=None): return cls().eval( source, globals=globals, locals=locals, eval_function=eval_function, ) def transpile(src): return cls().transpile(src) def is_incomplete_source(src, filename="<input>", symbol="single"): return cls().is_incomplete_source(src, filename, symbol) def namespace(turtle=None): """ Return a dictionary with all public functions. If turtle is given and it is either 'qt' or 'tk', it includes the corresponding turtle functions into the namespace. """ transpyler = cls() transpyler.has_turtle_functions = turtle is not None transpyler.turtle_backend = turtle transpyler.init() return transpyler.namespace # Update docstrings init.__doc__ = cls.init.__doc__ compile.__doc__ = cls.compile.__doc__ exec.__doc__ = cls.exec.__doc__ eval.__doc__ = cls.eval.__doc__ transpile.__doc__ = cls.transpile.__doc__ is_incomplete_source.__doc__ = cls.is_incomplete_source.__doc__ return dict( init=init, compile=compile, exec=exec, eval=eval, transpile=transpile, is_incomplete_source=is_incomplete_source, namespace=namespace, ) # # Console helpers # def console_banner(self, short=None): """ Return a string with the console banner. """ if short is None: short = self.use_short_banner if short: return self.short_banner return getattr(self, 'banner', self.short_banner) def recreate_namespace(self): """ Recompute the default namespace for the transpyler object. """ ns = self.namespace_factory(self) self.namespace = dict(ns) return self.namespace # # External execution # def start_console(self, console='auto'): """ Starts a regular python console with the current transpyler. Args: console: Can be one of 'jupyter', 'console', 'qtconsole', 'auto'. This chooses the default console application. The default behavior (auto) is to try jupyter and fallback to console if it is not available. """ # Select the console application if console == 'auto': try: import IPython # noqa: F401 except ImportError: console = 'console' else: console = 'jupyter' if console == 'qtconsole': from .jupyter import start_jupyter start_jupyter(transpyler=self, gui=True) elif console == 'jupyter': from .jupyter import start_jupyter start_jupyter(transpyler=self, gui=False) elif console == 'console': from .console import start_console start_console(transpyler=self) else: raise ValueError('invalid console: %r' % console) def start_notebook(self): """ Starts a jupyter notebook with the current transpyler. """ from .jupyter import start_notebook start_notebook(self) def start_qturtle(self): """ Starts a QTurtle application with the current transpyler. """ if not has_qt(): raise SystemExit('PyQt5 is necessary to run the turtle ' 'application.') from qturtle.mainwindow import start_application start_application(self) def start_main(self): """ Starts the default main application. """ import click @click.command() @click.option('--cli', '-c', is_flag=True, default=False, help='start gui-less console.') @click.option('--console', is_flag=True, default=False, help='start a simple gui-less console.') @click.option('--notebook/--no-notebook', '-n', default=False, help='starts notebook server.') def main(cli, notebook, console): if cli: return self.start_console('auto') if console: return self.start_console('console') if notebook: return self.start_notebook() if has_qt(): return self.start_qturtle() else: msg = 'Could not start GUI. Do you have Qt installed?' click.echo(msg, err=True) return self.start_console('jupyter') return main() # # Callbacks: those methods are designed to be overridden by instances # def exit_callback(self): print('bye!') raise SystemExit(0)
class A: x = lazy(lambda self: 42)
from lazyutils import lazy from sidekick import fn fn_property = lambda x: property(fn(x)._) # noqa: E731 fn_lazy = lambda x: lazy(fn(x)._) # noqa: E731
class Feedback(HasProgressMixin, models.TimeStampedModel, models.PolymorphicModel): """ Feedback for user. Usually there will be one feedback per submission, but this figure may vary from case to case. """ TITLE_OK = _('Correct answer!') TITLE_PARTIAL = _('Partially correct.') TITLE_WRONG = _('Wrong answer.') TITLE_NOT_GRADED = _('Not graded.') MESSAGE_OK = _('*Congratulations!* Your response is correct!') MESSAGE_OK_WITH_PENALTIES = _( 'Your response is correct, but you did not achieved the maximum grade.' ) MESSAGE_WRONG = _('I\'m sorry. Wrong response response!') MESSAGE_PARTIAL = _( 'Your answer is partially correct: you achieved %(grade)d%% of ' 'the total grade.') MESSAGE_NOT_GRADED = _('Your response has not been graded yet!') submission = models.OneToOneField('Submission', related_name='feedback') manual_grading = models.BooleanField( default=True, help_text=_('True if feedback was created manually by a human.')) grader_user = models.ForeignKey( models.User, blank=True, null=True, help_text=_('User that performed the manual grading.')) given_grade_pc = models.DecimalField( _('percentage of maximum grade'), help_text=_( 'This grade is given by the auto-grader and represents the grade ' 'for the response before accounting for any bonuses or penalties.' ), max_digits=6, decimal_places=3, validators=[grade_validator], blank=True, null=True, ) final_grade_pc = models.DecimalField( _('final grade'), help_text=_( 'Similar to given_grade, but can account for additional factors ' 'such as delay penalties or for any other reason the teacher may ' 'want to override the student\'s grade.'), max_digits=6, decimal_places=3, validators=[grade_validator], blank=True, null=True, ) is_correct = models.BooleanField(default=False) progress = lazy(lambda x: x.submission.progress) def get_feedback_title(self): """ Return a title summarizing the feedback result. The default set of titles come from the list: * Correct answer! * Partially correct. * Wrong answer. * Not graded. Different question types may define additional values to this list. """ grade = self.given_grade_pc if grade == 100: return self.TITLE_OK elif grade is not None and grade > 0: return self.TITLE_PARTIAL elif grade == 0: return self.TITLE_WRONG else: return self.TITLE_NOT_GRADED def update_autograde(self): """ Compute and set self.given_grade. This function may change other states in the feedback object, depending on the activity. """ activity = self.activity submission = self.submission self.given_grade_pc = self.get_given_autograde(submission, activity) def get_given_autograde(self, submission, activity): """ Atomic and testable version of autograde_update(). Subclasses should overide this method. Args: submission: a submission object activity: the activity the submission refers to Returns: A numeric value between 0 and 100 with the assigned grade. """ name = self.__class__.__name__ raise ImproperlyConfigured( 'Class %s must implement the .autograde() method.' % name) def update_final_grade(self): """ Compute final grade applying all possible penalties and bonuses. """ self.final_grade_pc = self.given_grade_pc if self.given_grade_pc == 100: self.is_correct = True def render_message(self, **kwargs): """ Renders feedback message. """ if self.is_correct and self.final_grade_pc >= self.given_grade_pc: msg = self.MESSAGE_OK elif self.is_correct and self.final_grade_pc < self.given_grade_pc: msg = self.MESSAGE_OK_WITH_PENALTIES elif not self.is_correct and self.given_grade_pc > 0: msg = self.MESSAGE_PARTIAL else: msg = self.MESSAGE_WRONG return p(msg, cls='cs-feedback-message').render(**kwargs)
from sidekick import fn from lazyutils import lazy import importlib fn_property = lambda x: property(fn(x)._) fn_lazy = lambda x: lazy(fn(x)._) class LazyModule: """ A lazy module object. """ def __init__(self, name): self.__path = name self.__mod = None def __load(self): self.__mod = importlib.import_module(self.__path) def __getattr__(self, item): if self.__mod is None: self.__load() value = getattr(self.__mod, item) setattr(self, item, value) return value def lazy_module(mod): """ Load a lazy module.
class DocTemplate: """ Treat an open document format (.ods, .odt, etc) as a Jinja2 template and execute a template transformation. """ xmlns = { 'text': 'urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'office': 'urn:oasis:names:tc:opendocument:xmlns:office:1.0', 'table': 'urn:oasis:names:tc:opendocument:xmlns:table:1.0', } zipfile = lazy(lambda self: zipfile.ZipFile(self.path)) def xml_tree(self): file = self.zipfile.open('content.xml') return ET.parse(file) def __init__(self, path): self.is_closed = False self.path = path def _check_open(self): if self.is_closed: raise RuntimeError('operation cannot be realized on closed file.') def render_template(self, namespace): """ Apply template at content.xml and write back on the zipfile. """ self._check_open() xml_root = self.xml_tree().getroot() non_rendered = [xml_root.find('office:body', self.xmlns)] while non_rendered: node = non_rendered.pop() node[:] = render_node(node, namespace) return ET.tounicode(xml_root) def render_at(self, namespace, dest): """ Render template and save result on the given destination. """ self._check_open() data = self.render_template(namespace) with zipfile.ZipFile(dest, 'w') as zip: for file in self.zipfile.namelist(): if file == 'content.xml': continue with zip.open(file, 'w') as dest_file: with self.zipfile.open(file) as src_file: dest_file.write(src_file.read()) with zip.open('content.xml', 'w') as F: F.write(data.encode('utf8')) self.close() def close(self): """ Close zipfile and flush all data to disk. """ if not self.is_closed: self.zipfile.close() self.is_closed = True
class Introspection: """ Introspection facilities for a transpyled Transpyler. """ # # Original python names and constants # py_constants = ['True', 'False', 'None'] @lazy def py_exceptions(self): return [ name for (name, value) in vars(_builtins).items() if isinstance(value, type) and issubclass(value, Exception) ] @lazy def py_types(self): return [ name for (name, value) in vars(_builtins).items() if isinstance(value, type) and not issubclass(value, Exception) ] @lazy def py_functions(self): return [ name for (name, value) in vars(_builtins).items() if name not in self.py_types and name not in self.py_exceptions ] @lazy def py_builtins(self): return self.py_types + self.py_functions py_submodules = [] py_keywords = [] # # Names derived from the transpyler # namespace = lazy(lambda self: self.transpyler.namespace) all_names = lazy(lambda self: list(self.namespace)) constants = lazy(lambda self: [ name for (name, value) in self.namespace.items() if isinstance(value, (int, float, bool)) ]) exceptions = lazy(lambda self: [ name for (name, value) in self.namespace.items() if isinstance(value, type) and issubclass(value, Exception) ]) types = lazy(lambda self: [ name for (name, value) in self.namespace.items() if isinstance(value, type) and not issubclass(value, Exception) ]) functions = lazy(lambda self: [ name for (name, value) in self.namespace.items() if not isinstance(value, type) and callable(value) ]) submodules = lazy(lambda self: [ name for (name, value) in self.namespace.items() if isinstance(value, ModuleType) ]) builtins = lazy(lambda self: self.functions + self.types) keywords = lazy(lambda self: self._extract_keywords()) # # Combined lists # all_constants = lazy( lambda self: unique(self.constants + self.py_constants)) all_exceptions = lazy( lambda self: unique(self.exceptions + self.py_exceptions)) all_types = lazy(lambda self: unique(self.types + self.py_types)) all_functions = lazy( lambda self: unique(self.functions + self.py_functions)) all_submodules = lazy( lambda self: unique(self.submodules + self.py_submodules)) all_builtins = lazy(lambda self: unique(self.builtins + self.py_builtins)) all_keywords = lazy(lambda self: unique(self.keywords + self.py_keywords)) def __init__(self, transpyler): self.transpyler = transpyler def _extract_keywords(self): keywords = set() for item in self.transpyler.translations: if isinstance(item, str): keywords.add(item) else: keywords.update(item) return sorted(keywords)
class VoteStats: """ Compute statistics, indexes, and perform generic mathematical analysis about votes and comments in a conversation. """ n_users = lazy(lambda self: len(self.votes['user'].unique())) n_comments = lazy(lambda self: len(self.votes['comment'].unique())) shape = property(lambda self: self.votes.shape) def __init__(self, votes, n_users=None, n_comments=None): if not isinstance(votes, pd.DataFrame): votes = list(votes) votes = pd.DataFrame(votes, columns=['user', 'comment', 'choice']) keys = votes.keys() if not ('user' in keys and 'comment' in keys and 'choice' in keys): msg = (f'must be a dataframe with "user", "comment" and "choice" ' f'columns, got: {list(keys)}') raise ValueError(msg) self.votes = votes if n_users is not None: self.n_users = n_users if n_comments is not None: self.n_comments = n_comments @lazy def pivot_table(self): """ Dataframe with users as index, comments as columns and votes as values. """ votes = self.votes return votes.pivot_table(index='user', columns='comment', values='choice') # Dataframes with user and comment statistics def _datasets(self, which, n_max): data = self.votes return dict( n_votes=num_votes(data, which), n_skip=num_votes(data, which, choice=SKIP), n_agree=num_votes(data, which, choice=AGREE), n_disagree=num_votes(data, which, choice=DISAGREE), n_max=n_max, avg_all=average_vote(data, which), avg_valid=average_vote(data, which, drop_skip=True), ) def comments(self, **kwargs): """ Return a dataframe with information about comments. """ kwargs = dict(self._datasets('comment', self.n_users), **kwargs) return base_stats(**kwargs) def users(self, **kwargs): """ Return a dataframe with statistics about users. """ data = self._datasets('user', self.n_comments) kwargs = dict(data, **kwargs) return base_stats(**kwargs)
class PopulationBase(collections.Sequence, metaclass=abc.ABCMeta): """ Base class for Population and MultiPopulation. Attrs: freqs: A list of :cls:`kpop.Prob` objects representing the probabilities of each loci. freqs_matrix: A full matrix with the shape (num individuals, max num of alleles) with the probability for each allele. freqs_vector: Frequencies for allele 1. This is more useful for biallelic data, since the frequency of the second allele is simply the complement. hfreqs_vector: Vector of frequencies of heterozygotes. """ # General shape size = property(len) num_loci = lazy(lambda _: _[0].num_loci) ploidy = lazy(lambda _: _[0].ploidy) shape = property(lambda _: (_.size, _.num_loci, _.ploidy)) data_size = fn_property(_.size * _.num_loci * _.ploidy) dtype = lazy(lambda _: np.dtype('uint8')) _shape_attrs = ( 'size', 'num_loci', 'ploidy', 'shape', 'data_size', ) # Frequencies freqs = property(get_freqs, set_freqs) freqs_matrix = lazy(lambda _: freqs_to_matrix(_.freqs)) freqs_vector = lazy(lambda _: np.ascontiguousarray(_.freqs_matrix[:, 0])) hfreqs_vector = lazy(hfreqs_vector) # Allele statistics allele_names = None is_biallelic = fn_lazy(_.num_alleles == 2) num_alleles = lazy(lambda _: max(max(freq) for freq in _.freqs)) # Multi population is_multi_population = False num_populations = fn_property(lambda _: len(_.populations)) # Missing data has_missing_data = property(lambda _: any(ind.has_missing for ind in _)) missing_data_total = property( lambda _: sum(ind.missing_data_total for ind in _)) missing_data_ratio = fn_property(_.missing_data_total / _.data_size) # Meta information individual_ids = lazy(lambda _: list(_.meta.index)) # Special attributes. These will be inserted later via monkey patching populations = () admixture = Admixture() clusterization = Clusterization() classification = Classification() io = Io() plot = Plot() projection = Projection() simulation = Simulation() statistics = Statistics() # Aliases admix = property(lambda self: self.admixture) cls = property(lambda self: self.classification) cluster = property(lambda self: self.clusterization) proj = property(lambda self: self.projection) sim = property(lambda self: self.simulation) stats = property(lambda self: self.statistics) # List of cacheable attributes _cacheable_attributes = ( 'has_missing', 'missing_total', 'missing_ratio', 'is_biallelic', 'num_alleles', 'admixture', 'clustering', 'classification', 'io', 'plot', 'projection', 'simulation', 'statistics', ) @classmethod def random(cls, size=0, num_loci=0, alleles=2, ploidy=2, id=None, seed=None): """ Creates a new random population. Args: size: Number of individuals. If a list of numbers is given, creates a Multipopulation object with sub-populations of the assigned sizes. num_loci: Number of loci in the genotype. alleles: Number of alleles for all loci. ploidy: Ploidy of genotype. min_prob: Minimum value for a frequency probability. Returns: A new population object. """ if num_loci <= 0: raise ValueError('num_loci must be at least one!') is_multipopulation = isinstance(size, collections.Sequence) sizes = [size] if not is_multipopulation else size seeds = get_seeds(len(sizes), seed) # Create frequencies and data all_data = [] all_freqs = [ random_frequencies(num_loci, alleles, seed=k) for k in seeds ] for pre_seed, freqs, size in zip(seeds, all_freqs, sizes): data = [] ind_seeds = get_seeds(size, pre_seed) for seed in ind_seeds: ind = random_individual_data(freqs, ploidy=ploidy, seed=seed) data.append(ind) all_data.append(np.array(data)) # Return population if is_multipopulation: sub_populations = [] for i in range(len(sizes)): id_i = None if id is None else '%s%s' % (id, i + 1) pop = kpop.Population(all_data[i], freqs=all_freqs[i], id=id_i, num_loci=num_loci, num_alleles=alleles, ploidy=ploidy) sub_populations.append(pop) return kpop.MultiPopulation(sub_populations, id=id) else: return kpop.Population(all_data[0], freqs=all_freqs[0], id=id, num_loci=num_loci, num_alleles=alleles, ploidy=ploidy) def __init__(self, freqs=None, allele_names=None, id=None, ploidy=None, num_loci=None, num_alleles=None): # Normalize frequencies self._init_freqs(freqs) # Fix num_loci from data if self._freqs is not None: self.num_loci = len(self._freqs) if num_loci is not None and num_loci != self.num_loci: raise ValueError('invalid value for num_loci') elif num_loci is not None: self.num_loci = num_loci # Save required attributes self.allele_names = allele_names self.id = id # Save optional given lazy attributes if ploidy is not None: self.ploidy = ploidy if num_alleles is not None: self.num_alleles = num_alleles def _init_freqs(self, freqs): if freqs is None: self._freqs = None elif len(freqs) == 0: raise ValueError('cannot initialize from empty frequencies') elif isinstance(freqs[0], collections.Mapping): self._freqs = [Prob(p) for p in freqs] else: freqs = np.asarray(freqs) if freqs.ndim == 2: self._freqs = [Prob(dict(enumerate(p, 1))) for p in freqs] self.freqs_matrix = np.array(freqs) self.num_alleles = freqs.shape[1] elif freqs.ndim == 1: self._freqs = [Prob({1: p, 2: 1 - p}) for p in freqs] self.freqs_vector = np.array(freqs) self.freqs_matrix = fill_freqs_vector(self.freqs_vector) self.num_alleles = 2 else: raise ValueError('invalid frequency data') def __repr__(self): return self.io.render(max_loci=20, max_ind=10) def __str__(self): return self.io.render() def __eq__(self, other): if not isinstance(other, PopulationBase): return NotImplemented if self.shape != other.shape: return False return all(x == y for x, y in zip(self, other)) def __getitem__(self, idx): if isinstance(idx, int): return self._getitem_by_index(idx) elif isinstance(idx, str): return self._getitem_by_label(idx) elif isinstance(idx, slice): return self._getslice(idx) elif isinstance(idx, np.ndarray) and idx.dtype.kind == 'i': return self.keep_individuals(idx) elif isinstance(idx, np.ndarray) and idx.dtype.kind == 'b': idx = np.arange(self.size)[idx] return self.keep_individuals(idx) else: typename = idx.__class__.__name__ raise TypeError('invalid index type: %s' % typename) def _getitem_by_label(self, key): idx = self.meta.index.get_loc(key) return self._getitem_by_index(idx) def _getitem_by_index(self, idx): raise NotImplementedError def _getslice(self, slice): item = self._getitem_by_index data = [item(i) for i in range(*slice.indices(self.size))] return kpop.Population(data, id=self.id) def _population(self, *args, **kwargs): from kpop import Population return Population(*args, **kwargs) def _clear_caches(self): discard_attrs(self, self._cacheable_attributes) def _as_array(self): return NotImplementedError('must be implemented on subclasses') def as_array(self, which='raw'): """ Convert to a numpy data array using the requested conversion method. This is a basic pre-processing step in many dimensionality reduction algorithms. Genotypes are categorical data and usually it doesn't make sense to treat the integer encoding used in kpop as ordinal data (there is no ordering implied when treating say, allele 1 vs allele 2 vs allele 3). Conversion methods: * raw: An 3 dimensional array of (size, num_loci, ploidy) for raw genotype data. Each component represents the value of a single allele. * flat: Like raw, but flatten the last dimension into a (size, num_loci * ploidy) array. This creates a new feature per loci for each degree of ploidy in the data. * rflat: Flatten data, but first shuffle the positions of alleles at each loci. This is recommended if data does not carry reliable haplotype information. * raw-norm, flat-norm, rflat-norm: Normalized versions of "raw", "flat", and "rflat" methods. All components are rescaled with zero mean and unity variance. * count: Force conversion to biallelic data and counts the number of occurrences of the first allele. Most methdds will require normalization, so you probably should consider an specific method such as count-unity, count-snp, etc * count-norm: Normalized version of count scaled to zero mean and unity variance. * count-snp: Normalizes each feature using the standard deviation expected under the assumption of Hardy-Weinberg equilibrium. This procedure is described at Patterson et. al., "Population Structure and Eigenanalysis" and is recommended for SNPs subject to genetic drift. * count-center: Instead of normalizing, simply center data by subtracting half the ploidy to place it into a symmetric range. This normalization puts data into a cube with a predictable origin and range. For diploid data, the components will be either -1, 0, or 1. Returns: An ndarray with transformed data. """ data_converter = DataConverter(self._as_array()) return data_converter(which) def find_non_biallelic(self): """ Finds all non-biallelic loci in population. """ return self.statistics.non_biallelic() def drop_non_biallelic(self, **kwargs): """ Creates a new population removing all non-biallelic loci. """ bad_loci = self.find_non_biallelic() return self.drop_loci(bad_loci, **kwargs) def force_biallelic(self, **kwargs): """ Return a new population with forced biallelic data. If a locus has more than 2 alleles, the most common allele is picked as allele 1 and the alternate allele 2 comprises all the other alleles. """ alleles_mapping = [biallelic_mapping(prob) for prob in self.freqs] return self.map_alleles(alleles_mapping, **kwargs) def sort_by_allele_freq(self, **kwargs): """ Return a new population in which the index attributed to each allele in each locus is sorted by the frequency in the population. After that, allele 1 will be the most common, allele 2 is the second most common and so on. """ alleles_mapping = [sorted_allele_mapping(prob) for prob in self.freqs] return self.map_alleles(alleles_mapping, **kwargs) @abc.abstractmethod def map_alleles(self, alleles_mapping, **kwargs): """ Create new population reorganizing all allele values by the given list of allele values mappings. Args: alleles_mapping: A list with num_loci elements. Each element must be a mapping from the old allele values to the new ones. If an element is an empty dictionary, no remapping is done. """ raise NotImplementedError('must be implemented in subclasses') def drop_loci(self, indexes, **kwargs): """ Create a new population with all loci in the given indexes removed. """ indexes = set(indexes) keep = np.array([i for i in range(self.num_loci) if i not in indexes]) return self.keep_loci(keep, **kwargs) def drop_individuals(self, indexes, **kwargs): """ Creates new population removing the individuals in the given indexes. """ indexes = set(indexes) keep = np.array([i for i in range(self.size) if i not in indexes]) return self.keep_individuals(keep, **kwargs) def find_missing_data(self, axis=0, thresh=0.0): """ Return the indexes for all all individuals or loci that have a proportion of missing data higher than the given threshold. Args: axis (0 or 1): If axis=0 or 'individuals' (default), it will scan individuals with a minimum amount of missing data values. If axis=1 or 'loci', it will drop all loci with the minimum ammount of missing data. thresh (float, between 0 and 1): The maximum proportion of missing data tolerated. Returns: An array of indexes. """ missing = self._as_array() == 0 if axis in (0, 'individuals'): mask = np.mean(missing, axis=(1, 2)) > thresh return np.arange(self.size)[mask] elif axis in (1, 'loci'): mask = np.mean(missing, axis=(0, 2)) > thresh return np.arange(self.size)[mask] else: raise ValueError('invalid value for axis: %r' % axis) def drop_missing_data(self, axis=0, thresh=0.0, **kwargs): """ Drop all individuals or loci that have a proportion of missing data higher than the given threshold. Args: axis (0 or 1): If axis=0 or 'individuals' (default), it will scan individuals with a minimum amount of missing data values. If axis=1 or 'loci', it will drop all loci with the minimum ammount of missing data. thresh (float, between 0 and 1): The maximum proportion of missing data tolerated. Returns: A new population. """ indexes = self.find_missing_data(axis, thresh) if axis in (0, 'individuals'): return self.drop_individuals(indexes, **kwargs) else: return self.drop_loci(indexes, **kwargs) @abc.abstractmethod def keep_loci(self, indexes, **kwargs): """ Creates a new population keeping only the loci in the given indexes. """ raise NotImplementedError('must be implemented in subclasses') @abc.abstractmethod def keep_individuals(self, indexes, **kwargs): """ Creates new population removing the individuals in the given indexes. """ raise NotImplementedError('must be implemented in subclasses') def shuffle_loci(self, **kwargs): """ Return a copy with shuffled contents of each locus. """ pop = self.copy(**kwargs) for ind in pop: for loci in ind.data: np.random.shuffle(loci) return pop def copy(self, id=None): """ Return a copy of population. """ new = copy.copy(self) new.populations = copy.copy(self.populations) new._clear_caches() if id is not None: new.id = id return new