def __setitem__(self, key, value): from corpkit.process import makesafe, is_number if key.startswith('c') and len(key) > 1 and all( is_number(x) for x in key[1:]): self.__setattr__(key.lstrip('c'), value) else: self.__setattr__(key, value)
def makesafe(variabletext): import re from corpkit.process import is_number variable_safe_r = re.compile('[\W_]+', re.UNICODE) try: txt = variabletext.name.split('.')[0].replace('-parsed', '') except AttributeError: txt = variabletext.split('.')[0].replace('-parsed', '') variable_safe = re.sub(variable_safe_r, '', txt) if is_number(variable_safe): variable_safe = 'c' + variable_safe return variable_safe
def __getitem__(self, key): """allow slicing, indexing""" from corpkit.process import makesafe if isinstance( key, slice ) : #Get the start, stop, and step from the slice return Corpora([self[ii] for ii in xrange(*key.indices(len(self)))]) elif type(key) == int: return self.__getitem__(makesafe(self.data[key])) else: try: return self.__getattribute__(key) except: from corpkit.process import is_number if is_number(key): return self.__getattribute__('c' + key)
def __getitem__(self, key): from corpkit.process import makesafe if isinstance(key, slice): # Get the start, stop, and step from the slice return Datalist([self[ii] for ii in range( *key.indices(len(self.subcorpora)))]) elif isinstance(key, int): return self.subcorpora.__getitem__(makesafe(self.subcorpora[key])) else: try: return self.subcorpora.__getattribute__(key) except: from corpkit.process import is_number if is_number(key): return self.__getattribute__('c' + key)
def makesafe(variabletext, drop_datatype=True, hyphens_ok=False): import re from corpkit.process import is_number if hyphens_ok: variable_safe_r = re.compile(r'[^A-Za-z0-9_-]+', re.UNICODE) else: variable_safe_r = re.compile(r'[^A-Za-z0-9_]+', re.UNICODE) try: txt = variabletext.name.split('.')[0] except AttributeError: txt = variabletext.split('.')[0] if drop_datatype: txt = txt.replace('-parsed', '') txt = txt.replace(' ', '_') if not hyphens_ok: txt = txt.replace('-', '_') variable_safe = re.sub(variable_safe_r, '', txt) if is_number(variable_safe): variable_safe = 'c' + variable_safe return variable_safe
def __getitem__(self, key): """allow slicing, indexing""" from corpkit.process import makesafe # allow slicing if isinstance(key, slice): n = OrderedDict() for ii in range(*key.indices(len(self))): n[self.keys()[ii]] = self[ii] return Interrodict(n) # allow integer index elif isinstance(key, int): return next(v for i, (k, v) in enumerate(self.items()) if i == key) #return self.subcorpora.__getitem__(makesafe(self.subcorpora[key])) # dict key access else: try: return OrderedDict.__getitem__(self, key) except: from corpkit.process import is_number if is_number(key): return self.__getattribute__('c' + key)