class Sentence(Doc): __database__ = DB_NAME __collection__ = sentences.name structure = { 'hash': unicode, #从en生成 'en': unicode, 'cn': unicode, 'include': Set(unicode), # 内含的单词 'votes': { unicode: int }, # word: vote 'sources': Set(unicode), # 来源 'create_time': datetime, 'modify_time': datetime, } required_fields = ['en', 'cn', 'hash'] default_values = {} indexes = [ { 'fields': ['include'] }, { 'fields': ['hash'] }, ] use_dot_notation = True @staticmethod def make_hash(en): if not en: return None low_en_list = [ word.lower().strip() for word in re.findall('[a-zA-Z]+', en) ] en_md5 = md5(' '.join(low_en_list)).hexdigest() return unicode(en_md5) @staticmethod def get_token_sentences(token): cur = db.Sentence.find({'include': token.en}, sort=[('_id', 1)]) return list(cur) def update_include(self, include): if not self.include: self.include = [] self.include = list(set(self.include + include)) return self.include def before_save(self): self.hash = Sentence.make_hash(self.en) self.modify_time = datetime.now() if not self.get('_id'): self.create_time = datetime.now()
class Group(OctobearDocument): __collection__ = 'groups' structure = { 'name': basestring, 'members': Set(ObjectId), # Member 'grades': Set(ObjectId) # Grade } required_fields = ['name'] indexes = [ { 'fields': 'name', 'unique': True, 'ttl': 0 } ]
class Member(OctobearDocument): __collection__ = 'members' structure = { 'sid': int, 'login': basestring, 'name': basestring, 'email': basestring, 'github': basestring, 'registered': bool, 'time_registered': datetime.datetime, 'role': IS(0, 1, 2, 3), # 0: student; 1: reader; 2: ta; 3: instructor 'grades': Set(ObjectId), # Set(Grade) 'group': ObjectId # Group } required_fields = ['sid', 'role', 'registered'] default_values = {'role': 0, 'registered': False} indexes = [ { 'fields': 'sid', 'unique': True, 'ttl': 0 } ]
class MyDoc(Document): structure = { 'title': six.text_type, 'category': Set(six.text_type) } required_fields = ['title']
class MyDoc(Document): structure = { "tags": Set(int), }
class MyDoc(Document): structure = { 'title':unicode, 'category':Set(unicode) } required_fields=['title']
class MyDoc(Document): structure = {'title': str, 'category': Set(str)} required_fields = ['title']
class Token(Doc): __database__ = DB_NAME __collection__ = tokens.name structure = { 'hash': unicode, # lower case 'en', auto modify on save 'en': unicode, # from qiji '_en': Set(unicode), # ill formed en, maybe alias, 'freq': float, # 0.0 - 1.0, -1 for empty 'spells': Set(unicode), # eg: mr mr. Mr Mr. 'phs': Set(unicode), # from iciba(best) 'exp': { 'core': unicode, 'cn': [{ 'pos': [unicode], 'text': unicode }], }, 'courses': Set(unicode), # course names 'tags': Set(unicode), # trash, word/phrase 'note': unicode, # 备注字段 'modify_time': datetime, #auto modify on save 'create_time': datetime, #auto modify on save } default_values = { 'freq': -1.0, 'exp.cn': [], 'exp.core': u'', 'note': u'', '_en': set(), } indexes = [ { 'fields': ['en'] }, { 'fields': ['hash'] }, { 'fields': ['courses'] }, { 'fields': ['tags'] }, ] use_dot_notation = True schemeless = True def sentences(self): sents = db.Sentence.find({'include': self.en}) return list(sents) @staticmethod def get_token(en): doc = db.Token.find_one({'en': unicode(en)}) return doc @classmethod def normalize(cls, en): # ... 加上左右空格 en = re.sub('\.\.+', ' ... ', en) # sb/sb. -> sb. en = re.sub('''(?i)(\W|\A)(sb\.?)(\Z|\W)''', r'\1sb.\3', en) # sth/sth. -> sth. en = re.sub('''(?i)(\W|\A)(sth\.?)(\Z|\W)''', r'\1sth.\3', en) # 去除多余的连续空格 en = re.sub('\s\s+', ' ', en) return en.strip() @classmethod def make_hash(cls, en): return en.strip().lower() def trash(self, flag): if flag: self.tags.add(u'trash') else: self.tags.discard(u'trash') def before_save(self): self.en = self.en.strip() self.spells.add(self.en) self.hash = Token.make_hash(self.en) self.freq = float(self.freq) type = u'word' for x in ['.', ' ', '/']: if x in self.en: type = u'phrase' break self.tags.difference_update([u'word', u'phrase']) self.tags.add(type) self.modify_time = datetime.now() if not self.get('_id'): self.create_time = datetime.now()