def setUp(self): db_username_postgres = 'postgres' db_password_postgres = '1234' db_postgres_url = 'postgres://' + db_username_postgres + ':' + db_password_postgres + '@localhost/dev' path_to_database = path.join(path.curdir, "../databases") self.db_test = DAL(db_postgres_url, folder=path_to_database) self.db_test.import_table_definitions(path_to_database)
def __init__(self, path = None, isInit = False, isReset = False): self.isInit = isInit self.isReset = isReset if path is not None: self.db = DAL(ITM_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit, folder = path) else: self.db = DAL(ITM_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit)
def __init__(self, path = None, isInit = False, isImport = False, isReset = False): self.isInit = isInit self.isImport = isImport self.isReset = isReset isInitOrImport = self.isInit or self.isImport if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not isInitOrImport, migrate_enabled = isInitOrImport, folder = path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not isInitOrImport, migrate_enabled = isInitOrImport)
def __init__(self, path=None, isInit=False): self.isInit = isInit if path is not None: self.db = DAL(BOW_DB.CONNECTION, lazy_tables=not self.isInit, migrate_enabled=self.isInit, folder=path) else: self.db = DAL(BOW_DB.CONNECTION, lazy_tables=not self.isInit, migrate_enabled=self.isInit)
class ITM_DB(): FILENAME = 'itm.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = {} def __init__(self, path = None, isInit = False, isReset = False): self.isInit = isInit self.isReset = isReset if path is not None: self.db = DAL(ITM_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit, folder = path) else: self.db = DAL(ITM_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit) def __enter__(self): self.DefineOptionsTable() if self.isReset: self.Reset() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) if self.db(self.db.options).count() == 0: for key, value in ITM_DB.DEFAULT_OPTIONS.iteritems(): self.db.options.insert( key = key, value = value ) def SetOption(self, key, value): keyValue = self.db( self.db.options.key == key ).select().first() if keyValue: keyValue.update_record( value = value ) else: self.db.options.insert( key = key, value = value ) def GetOption(self, key): keyValue = self.db( self.db.options.key == key ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None def Reset(self): pass
def __init__(self, path=None, isInit=False, isImport=False, isReset=False): self.isInit = isInit self.isImport = isImport self.isReset = isReset isInitOrImport = self.isInit or self.isImport if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables=not isInitOrImport, migrate_enabled=isInitOrImport, folder=path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables=not isInitOrImport, migrate_enabled=isInitOrImport)
def copy_db(env, db_name='db', db_link='sqlite:memory:'): try: from gluon.sql import DAL except ImportError: raise ImportError(""" No module named `gluon`, unable to find `sql.DAL` Make sure you have the correct URI that is the main web2py directory. This should include your `applications` and `gluon` directories.""") test_db = DAL(db_link) for tablename in env[db_name].tables: table_copy = [copy(f) for f in env[db_name][tablename]] test_db.define_table(tablename, *table_copy, migrate=True) return test_db
class TestModuleSetup(unittest.TestCase): """docstring for TestModuleSetup""" def setUp(self): db_username_postgres = 'postgres' db_password_postgres = '1234' db_postgres_url = 'postgres://' + db_username_postgres + ':' + db_password_postgres + '@localhost/dev' path_to_database = path.join(path.curdir, "../databases") self.db_test = DAL(db_postgres_url, folder=path_to_database) self.db_test.import_table_definitions(path_to_database) def limpa_dados_tabela(self, nome_tabela): self.db_test.executesql('delete from ' + nome_tabela) self.db_test.commit()
def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit, folder = path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit)
""" This file is part of the Nervatura Framework http://www.nervatura.com Copyright © 2011-2015, Csaba Kappel License: LGPLv3 http://www.nervatura.com/nerva2py/default/licenses """ if 0: global request; request = globals.Request() from gluon.globals import Session global session; session = Session() global response; response = globals.Response() import gluon.languages.translator as T from gluon.sql import DAL global db; db = DAL() from gluon.html import URL from nerva2py.nervastore import NervaStore from gluon.html import TABLE, TR, TD from gluon.sqlhtml import SPAN, A #postgres://username:password@localhost/database #mysql://username:password@localhost/database #sqlite://database.db conStr="sqlite://demo.db" ns = NervaStore(request, session, T, None) ns.engine = "sqlite" ns.connect.setConnect(uri=conStr, pool_size=0, createdb=False) if ns.db:
class MultipleLDA_DB(): FILENAME = 'multiple_lda.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = { 'max_co_topic_count' : 10000 # Number of topic pairs to store } def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(MultipleLDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit, folder = path) else: self.db = DAL(MultipleLDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineDimensionTables() self.DefineMatrixTables() self.DefineStatsTables() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) if self.isInit: for key, value in LDA_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption( key, value ) def SetOption(self, key, value): where = self.db.options.key == key if self.db( where ).count() > 0: self.db( where ).update( value = value ) else: self.db.options.insert( key = key, value = value ) def GetOption(self, key): where = self.db.options.key == key keyValue = self.db( where ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineDimensionTables(self): self.db.define_table( 'terms', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text' , 'string' , required = True, unique = True ), Field( 'term_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_index ON terms (term_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_text ON terms (term_text);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_freq ON terms (term_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_rank ON terms (rank);' ) self.db.define_table( 'docs', Field( 'doc_index', 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_index ON docs (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_id ON docs (doc_id);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_freq ON docs (doc_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_rank ON docs (rank);' ) self.db.define_table( 'topics', Field( 'entry_index', 'integer' , required = True, default = -1 ), Field( 'topic_index', 'integer' , required = True, default = -1 ), Field( 'topic_freq' , 'double' , required = True ), Field( 'topic_label', 'string' , required = True ), Field( 'topic_desc' , 'string' , required = True ), Field( 'top_terms' , 'list:integer', required = True ), Field( 'top_docs' , 'list:integer', required = True ), Field( 'rank' , 'integer' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topics_indexes ON topics (entry_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_freq ON topics (topic_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_rank ON topics (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_freqEntry ON topics (entry_index, topic_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_rankEntry ON topics (entry_index, rank);' ) def DefineMatrixTables(self): self.db.define_table( 'term_topic_matrix', Field( 'entry_index', 'integer', required = True, default = -1 ), Field( 'term_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_topic_indexes ON term_topic_matrix (entry_index, term_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_value ON term_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_rank ON term_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_termindex ON term_topic_matrix (term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_topicindex ON term_topic_matrix (topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_valueEntry ON term_topic_matrix (entry_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_rankEntry ON term_topic_matrix (entry_index, rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_termindexEntry ON term_topic_matrix (entry_index, term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_topicindexEntry ON term_topic_matrix (entry_index, topic_index);' ) self.db.define_table( 'doc_topic_matrix', Field( 'entry_index', 'integer', required = True, default = -1 ), Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS doc_topic_indexes ON doc_topic_matrix (entry_index, doc_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_value ON doc_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_rank ON doc_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_docindex ON doc_topic_matrix (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_topicindex ON doc_topic_matrix (topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_valueEntry ON doc_topic_matrix (entry_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_rankEntry ON doc_topic_matrix (entry_index, rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_docindexEntry ON doc_topic_matrix (entry_index, doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_topicindexEntry ON doc_topic_matrix (entry_index, topic_index);' ) def DefineStatsTables(self): self.db.define_table( 'topic_cossim', Field( 'first_entry_index' , 'integer', required = True, default = -1 ), Field( 'first_topic_index' , 'integer', required = True, default = -1 ), Field( 'second_entry_index', 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_cossim_indexes ON topic_cossim (first_entry_index, first_topic_index, second_entry_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_value ON topic_cossim (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_rank ON topic_cossim (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_valueFirst ON topic_cossim (first_entry_index, first_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_rankFirst ON topic_cossim (first_entry_index, first_topic_index, rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_valueSecond ON topic_cossim (second_entry_index, second_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cossim_rankSecond ON topic_cossim (second_entry_index, second_topic_index, rank);' ) self.db.define_table( 'topic_kldiv', Field( 'first_entry_index' , 'integer', required = True, default = -1 ), Field( 'first_topic_index' , 'integer', required = True, default = -1 ), Field( 'second_entry_index', 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_kldiv_indexes ON topic_kldiv (first_entry_index, first_topic_index, second_entry_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_value ON topic_kldiv (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_rank ON topic_kldiv (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_valueFirst ON topic_kldiv (first_entry_index, first_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_rankFirst ON topic_kldiv (first_entry_index, first_topic_index, rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_valueSecond ON topic_kldiv (second_entry_index, second_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_kldiv_rankSecond ON topic_kldiv (second_entry_index, second_topic_index, rank);' ) self.db.define_table( 'topic_rdp', Field( 'first_entry_index' , 'integer', required = True, default = -1 ), Field( 'first_topic_index' , 'integer', required = True, default = -1 ), Field( 'second_entry_index', 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_rdp_indexes ON topic_rdp (first_entry_index, first_topic_index, second_entry_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_value ON topic_rdp (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_rank ON topic_rdp (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_valueFirst ON topic_rdp (first_entry_index, first_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_rankFirst ON topic_rdp (first_entry_index, first_topic_index, rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_valueSecond ON topic_rdp (second_entry_index, second_topic_index, value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_rdp_rankSecond ON topic_rdp (second_entry_index, second_topic_index, rank);' ) ################################################################################ def Reset(self): self.db.executesql( 'DELETE FROM terms;' ) self.db.executesql( 'DELETE FROM docs;' ) self.db.executesql( 'DELETE FROM topics;' ) self.db.executesql( 'DELETE FROM term_topic_matrix;' ) self.db.executesql( 'DELETE FROM doc_topic_matrix;' ) self.db.executesql( 'DELETE FROM topic_cossim;' ) self.db.executesql( 'DELETE FROM topic_kldiv;' ) self.db.executesql( 'DELETE FROM topic_rdp;' )
DEMO_MODE = False response.google_analytics_id = None if not request.env.web2py_runtime_gae: request.data_folder = None if os.path.isdir(os.path.join("..", "..", "data")): request.data_folder = os.path.join("..", "..", "data") elif os.path.isdir(os.path.join("..", "..", "databases")): request.data_folder = os.path.join("..", "..", "databases") elif os.path.isdir(os.path.join("..", "data")): request.data_folder = os.path.join("..", "data") elif os.path.isdir(os.path.join("..", "databases")): request.data_folder = os.path.join("..", "databases") ename = "sqlite" db = DAL("sqlite://storage.sqlite", migrate=False, fake_migrate=False, folder=request.data_folder) session_db = DAL("sqlite://session.sqlite", folder=request.data_folder) session.connect(request, response, db=session_db) reload(sys) sys.setdefaultencoding("utf-8") # @UndefinedVariable else: ename = "google_datastore" # db = DAL('google:datastore://storage', migrate=False, fake_migrate=False) db = DAL("google:datastore", adapter_args={"ndb_settings": None, "use_ndb": False}) session.connect(request, response, db=db) # from gluon.contrib.memdb import MEMDB # from google.appengine.api.memcache import Client # session.connect(request, response, db = MEMDB(Client())) response.generic_patterns = ["*"] if request.is_local else [] try:
class Corpus_DB(): FILENAME = 'corpus.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DOC_IDS = ['doc_id', 'docid'] DOC_CONTENTS = ['doc_content', 'doccontent', 'docbody', 'doc_body'] DEFAULT_OPTIONS = { 'token_regex': r'\w{3,}', 'min_freq': 5, 'min_doc_freq': 3, 'max_freq_count': 4000, 'max_co_freq_count': 160000, 'max_g2_count': 160000 } def __init__(self, path=None, isInit=False, isImport=False, isReset=False): self.isInit = isInit self.isImport = isImport self.isReset = isReset isInitOrImport = self.isInit or self.isImport if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables=not isInitOrImport, migrate_enabled=isInitOrImport, folder=path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables=not isInitOrImport, migrate_enabled=isInitOrImport) def __enter__(self): self.DefineOptionsTable() self.DefineModelsTable() self.DefineCorpusTable() self.DefineMetadataTables() if self.isReset: self.Reset() self.DefineTermStatsTables() self.DefineTermCoStatsTables() self.DefineSentenceCoStatsTables() self.DefineTemporaryTable() return self def __exit__(self, type, value, traceback): self.DefineCorpusTextSearch() self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table('options', Field('key', 'string', required=True, unique=True), Field('value', 'string', required=True), migrate=self.isImport) if self.isInit: for key, value in Corpus_DB.DEFAULT_OPTIONS.iteritems(): keyValue = self.db(self.db.options.key == key).select().first() if keyValue: keyValue.update_record(value=value) else: self.db.options.insert(key=key, value=value) def SetOption(self, key, value): keyValue = self.db(self.db.options.key == key).select().first() if keyValue: keyValue.update_record(value=value) else: self.db.options.insert(key=key, value=value) def GetOption(self, key): keyValue = self.db(self.db.options.key == key).select( self.db.options.value).first() if keyValue: return keyValue.value else: return None def DefineModelsTable(self): self.db.define_table('models', Field('model_key', 'string', required=True, unique=True), Field('model_desc', 'string', required=True), migrate=self.isImport) def AddModel(self, model_key, model_desc): keyDesc = self.db( self.db.models.model_key == model_key).select().first() if keyDesc: keyDesc.update_record(model_desc=model_desc) else: self.db.models.insert(model_key=model_key, model_desc=model_desc) def GetModels(self): models = self.db(self.db.models).select(self.db.models.model_key) return [model.model_key for model in models] def GetModelDescription(self, model_key): model = self.db(self.db.models.model_key == model_key).select().first() if model: return model.model_desc else: return None ################################################################################ def DefineCorpusTable(self): self.db.define_table('corpus', Field('doc_index', 'integer', required=True, unique=True, default=-1), Field('doc_id', 'string', required=True, unique=True), Field('doc_content', 'text', required=True), migrate=self.isImport) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_index ON corpus (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_id ON corpus (doc_id);' ) def DefineMetadataTables(self): self.db.define_table('fields', Field('field_index', 'integer', required=True, unique=True, default=-1), Field('field_name', 'string', required=True, unique=True), Field('field_type', 'string', required=True), migrate=self.isImport) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_index ON fields (field_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_name ON fields (field_name);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS field_type ON fields (field_type);' ) self.db.define_table('metadata', Field('doc_index', 'integer', required=True, default=-1), Field('field_index', 'integer', required=True, default=-1), Field('value', 'text', required=True), migrate=self.isImport) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS metadata_indexes ON metadata (doc_index, field_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_doc_index ON metadata (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_field_index ON metadata (field_index);' ) ################################################################################ def DefineCorpusTextSearch(self): if self.isImport: self.db.executesql('DROP TABLE IF EXISTS corpus_search;') self.db.executesql( 'CREATE VIRTUAL TABLE corpus_search USING fts3 (doc_content TEXT);' ) self.db.executesql( 'INSERT INTO corpus_search (rowid, doc_content) SELECT doc_index, doc_content FROM corpus;' ) ################################################################################ def ImportFromFile(self, filename): """ filename = A plain-text file (utf-8 encoded) containing one document per line """ def ReadFile(): with open(filename, 'r') as f: for index, line in enumerate(f): doc_index = index values = line.decode('utf-8', 'ignore').rstrip('\n').split('\t') if len(values) == 1: doc_id = 'doc{}'.format(doc_index + 1) doc_content = values[0] else: doc_id = values[0] doc_content = values[1] yield { 'doc_index': doc_index, 'doc_id': doc_id, 'doc_content': doc_content.encode('utf-8') } self.db.corpus.bulk_insert(ReadFile()) def ImportFromFolder(self, glob_pattern): """ glob_pattern = A folder of files or a glob pattern for list of files (utf-8 encoded, one document per file) """ def ReadFolder(): filenames = sorted(glob.glob(glob_pattern)) for index, filename in enumerate(filenames): doc_index = index doc_id = filename with open(filename, 'r') as f: doc_content = f.read().decode('utf-8', 'ignore') yield { 'doc_index': doc_index, 'doc_id': doc_id, 'doc_content': doc_content.encode('utf-8') } self.db.corpus.bulk_insert(ReadFolder()) def ImportFromSpreadsheet(self, filename, id_key=None, content_key=None, is_csv=False): """ filename = A tab- or comman-separated spreadsheet (utf-8 encoded, with a header row containing column names) id_key = Name of the column containing unique document IDs content_key = Name of the column containing the document contents """ doc_id_keys = frozenset( [id_key] if id_key is not None else Corpus_DB.DOC_IDS) doc_content_keys = frozenset([content_key] if content_key is not None else Corpus_DB.DOC_CONTENTS) field_indexes = [] field_names = [] field_types = [] metadata = [] def ReadCSV(): with open(filename, 'r') as f: reader = UnicodeReader(f) for row in reader: yield row def ReadTSV(): with open(filename, 'r') as f: for line in f: yield line.decode('utf-8', 'ignore').rstrip('\n').split('\t') def ReadSpreadsheet(reader): field_doc_id = None field_doc_content = None for row_index, values in enumerate(reader): if row_index == 0: for index, field in enumerate(values): if field.lower() in doc_id_keys: field_doc_id = index elif field.lower() in doc_content_keys: field_doc_content = index else: field_index = len(field_indexes) field_indexes.append(field_index) field_names.append(field) field_types.append('integer') else: doc_index = row_index - 1 doc_id = 'doc{:d}'.format(doc_index + 1) doc_content = '' field_index = 0 for index, value in enumerate(values): if field_doc_id == index: doc_id = value elif field_doc_content == index: doc_content = value else: metadata.append({ 'doc_index': doc_index, 'field_index': field_index, 'value': value.encode('utf-8') }) # [START] infer field type field_type = field_types[field_index] if field_type == 'integer': try: int(value) except ValueError: field_type = 'double' if field_type == 'double': try: float(value) except ValueError: field_type = 'string' field_types[field_index] = field_type # [END] infer field type field_index += 1 yield { 'doc_index': doc_index, 'doc_id': doc_id, 'doc_content': doc_content.encode('utf-8') } def GetFields(): for field_index in field_indexes: field_name = field_names[field_index] field_type = field_types[field_index] yield { 'field_index': field_index, 'field_name': field_name, 'field_type': field_type } reader = ReadCSV() if is_csv else ReadTSV() self.db.corpus.bulk_insert(ReadSpreadsheet(reader)) self.db.fields.bulk_insert(GetFields()) self.db.metadata.bulk_insert(metadata) def ExportToFile(self, filename): """ filename = A tab-delimited file (utf-8 encoded, without header) containing docIDs and document contents """ def WriteFile(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: for row in rows: doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) f.write(u'{}\t{}\n'.format(doc_id, doc_content).encode('utf-8')) rows = self.db().select(self.db.corpus.doc_id, self.db.corpus.doc_content, orderby=self.db.corpus.doc_index) WriteFile(rows) def ExportToSpreadsheet(self, filename, is_csv=False): """ filename = A tab- or comma-separated spreadsheet (utf-8 encoded, with header) containing the text corpus and all metadata """ field_names = [ row.field_name for row in self.db().select(self.db.fields.field_name, orderby=self.db.fields.field_index) ] field_count = len(field_names) all_field_names = ['doc_id', 'doc_content'] + field_names def WriteCSV(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: writer = UnicodeWriter(f) writer.writerow(all_field_names) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db( self.db.metadata.doc_index == doc_index).select( self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = m.sub(u' ', d.value.decode('utf-8')) all_values = [doc_id, doc_content] + values writer.writerow(all_values) def WriteTSV(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: f.write(u'{}\n'.format( u'\t'.join(all_field_names)).encode('utf-8')) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db( self.db.metadata.doc_index == doc_index).select( self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = m.sub(u' ', d.value.decode('utf-8')) all_values = [doc_id, doc_content] + values f.write(u'{}\n'.format( u'\t'.join(all_values)).encode('utf-8')) rows = self.db().select(self.db.corpus.doc_index, self.db.corpus.doc_id, self.db.corpus.doc_content, orderby=self.db.corpus.doc_index) if is_csv: WriteCSV(rows) else: WriteTSV(rows) ################################################################################ def DefineTermStatsTables(self): self.db.define_table('term_texts', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('term_text', 'string', required=True, unique=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_text_value ON term_texts (term_text);' ) self.db.define_table('term_freqs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_value ON term_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_rank ON term_freqs (rank);' ) self.db.define_table('term_probs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_value ON term_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_rank ON term_probs (rank);' ) self.db.define_table('term_doc_freqs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_value ON term_doc_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_rank ON term_doc_freqs (rank);' ) def DefineTermCoStatsTables(self): self.db.define_table('term_co_freqs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_freqs_indexes ON term_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_value ON term_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_rank ON term_co_freqs (rank);' ) self.db.define_table('term_co_probs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_probs_indexes ON term_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_value ON term_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_rank ON term_co_probs (rank);' ) self.db.define_table('term_g2', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_g2_indexes ON term_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_value ON term_g2 (value);') self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_rank ON term_g2 (rank);') def DefineSentenceCoStatsTables(self): self.db.define_table('sentences_co_freqs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_freqs_indexes ON sentences_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_value ON sentences_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_rank ON sentences_co_freqs (rank);' ) self.db.define_table('sentences_co_probs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_probs_indexes ON sentences_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_value ON sentences_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_rank ON sentences_co_probs (rank);' ) self.db.define_table('sentences_g2', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_g2_indexes ON sentences_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_value ON sentences_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_rank ON sentences_g2 (rank);' ) ################################################################################ def Reset(self): self.db.executesql('DELETE FROM term_texts;') self.db.executesql('DELETE FROM term_freqs;') self.db.executesql('DELETE FROM term_probs;') self.db.executesql('DELETE FROM term_doc_freqs;') self.db.executesql('DELETE FROM term_co_freqs;') self.db.executesql('DELETE FROM term_co_probs;') self.db.executesql('DELETE FROM term_g2;') self.db.executesql('DELETE FROM sentences_co_freqs;') self.db.executesql('DELETE FROM sentences_co_probs;') self.db.executesql('DELETE FROM sentences_g2;') ################################################################################ def DefineTemporaryTable(self): self.db.define_table('vocab', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('term_text', 'string', required=True), migrate=self.isInit) self.db.define_table('vocab_text', Field('term_text', 'string', required=True, unique=True, default=-1), migrate=self.isInit)
DEMO_MODE = False response.google_analytics_id = None if not request.env.web2py_runtime_gae: request.data_folder = None if os.path.isdir(os.path.join('..','..','data')): request.data_folder = os.path.join('..','..','data') elif os.path.isdir(os.path.join('..','..','databases')): request.data_folder = os.path.join('..','..','databases') elif os.path.isdir(os.path.join('..','data')): request.data_folder = os.path.join('..','data') elif os.path.isdir(os.path.join('..','databases')): request.data_folder = os.path.join('..','databases') ename="sqlite" db = DAL('sqlite://storage.sqlite', migrate=False, fake_migrate=False, folder=request.data_folder) session_db = DAL('sqlite://session.sqlite', folder=request.data_folder) session.connect(request, response, db = session_db) reload(sys) sys.setdefaultencoding("utf-8")#@UndefinedVariable else: ename="google_datastore" #db = DAL('google:datastore://storage', migrate=False, fake_migrate=False) db = DAL('google:datastore', adapter_args={'ndb_settings':None, 'use_ndb':False}) session.connect(request, response, db = db) #from gluon.contrib.memdb import MEMDB #from google.appengine.api.memcache import Client #session.connect(request, response, db = MEMDB(Client())) response.generic_patterns = ['*'] if request.is_local else [] try:
class BOW_DB(): FILENAME = 'bow.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = { 'max_freq_count' : 4000, # Maximum number of terms to store 'max_co_freq_count' : 100000 # Maximum number of term pairs to store } def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(BOW_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit, folder = path) else: self.db = DAL(BOW_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineTermStatsTables() self.DefineTermCoStatsTables() self.DefineSentenceCoStatsTables() self.DefineTemporaryTable() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) for key, value in BOW_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption( key, value, overwrite = self.isInit ) def SetOption(self, key, value, overwrite = True): where = self.db.options.key == key if self.db( where ).count() > 0: if overwrite: self.db( where ).update( value = value ) else: self.db.options.insert( key = key, value = value ) self.db.commit() def GetOption(self, key): where = self.db.options.key == key keyValue = self.db( where ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineTermStatsTables(self): self.db.define_table( 'term_texts', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text' , 'string' , required = True, unique = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_text_value ON term_texts (term_text);' ) self.db.define_table( 'term_freqs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_value ON term_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_rank ON term_freqs (rank);' ) self.db.define_table( 'term_probs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_value ON term_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_rank ON term_probs (rank);' ) self.db.define_table( 'term_doc_freqs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_value ON term_doc_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_rank ON term_doc_freqs (rank);' ) def DefineTermCoStatsTables(self): self.db.define_table( 'term_co_freqs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_freqs_indexes ON term_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_value ON term_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_rank ON term_co_freqs (rank);' ) self.db.define_table( 'term_co_probs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_probs_indexes ON term_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_value ON term_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_rank ON term_co_probs (rank);' ) self.db.define_table( 'term_g2', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_g2_indexes ON term_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_value ON term_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_rank ON term_g2 (rank);' ) def DefineSentenceCoStatsTables(self): self.db.define_table( 'sentences_co_freqs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_freqs_indexes ON sentences_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_value ON sentences_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_rank ON sentences_co_freqs (rank);' ) self.db.define_table( 'sentences_co_probs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_probs_indexes ON sentences_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_value ON sentences_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_rank ON sentences_co_probs (rank);' ) self.db.define_table( 'sentences_g2', Field( 'first_term_index', 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_g2_indexes ON sentences_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_value ON sentences_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_rank ON sentences_g2 (rank);' ) ################################################################################ def Reset(self): self.db.executesql( 'DELETE FROM term_texts;' ) self.db.executesql( 'DELETE FROM term_freqs;' ) self.db.executesql( 'DELETE FROM term_probs;' ) self.db.executesql( 'DELETE FROM term_doc_freqs;' ) self.db.executesql( 'DELETE FROM term_co_freqs;' ) self.db.executesql( 'DELETE FROM term_co_probs;' ) self.db.executesql( 'DELETE FROM term_g2;' ) self.db.executesql( 'DELETE FROM sentences_co_freqs;' ) self.db.executesql( 'DELETE FROM sentences_co_probs;' ) self.db.executesql( 'DELETE FROM sentences_g2;' ) ################################################################################ def DefineTemporaryTable(self): self.db.define_table( 'vocab', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text', 'string', required = True ), migrate = self.isInit ) self.db.define_table( 'vocab_text', Field( 'term_text', 'string', required = True, unique = True, default = -1 ), migrate = self.isInit )
current.cache = cache = None current.T = T = None def initVars(): global current, request, response, session, cache, T current.request = request = Request() current.response = response = Response() current.session = session = Session() current.cache = cache = Cache(request) current.T = T = m__T__ initVars() deleteDB() db = DAL('sqlite://'+DB_PATH) import gluon.tools as gt from mock import Mock gt.URL=Mock(side_effect=m__URL__) crud = gt.Crud(db) # # Alguns imports globais do web2py # # Ja feitos
Copyright © 2011-2015, Csaba Kappel License: LGPLv3 http://www.nervatura.com/nerva2py/default/licenses """ if 0: global response response = globals.Response() global request request = globals.Request() from gluon.globals import Session global session session = Session() from gluon.sql import DAL global db db = DAL() import gluon.languages.translator as T from db import DEMO_MODE from gluon.http import redirect from gluon.sqlhtml import DIV, SPAN, A, INPUT, FORM from gluon.html import BR, HR, SELECT, OPTION, P, IMG, TABLE, TR, TD from gluon.validators import IS_NOT_EMPTY from gluon.html import URL from gluon.storage import Storage import os from gluon.sqlhtml import SQLFORM from gluon.validators import IS_IN_DB, IS_IN_SET, IS_EMPTY_OR from gluon.html import TBODY, THEAD, TH, TEXTAREA, CODE from gluon.sql import Field
""" This file is part of the Nervatura Framework http://www.nervatura.com Copyright © 2011-2015, Csaba Kappel License: LGPLv3 http://www.nervatura.com/nerva2py/default/licenses """ if 0: global response; response = globals.Response() global request; request = globals.Request() from gluon.globals import Session global session; session = Session() from gluon.sql import DAL global db; db = DAL() import gluon.languages.translator as T from db import DEMO_MODE from gluon.http import redirect from gluon.sqlhtml import DIV, SPAN, A, INPUT, FORM from gluon.html import BR, HR, SELECT, OPTION, P, IMG, TABLE, TR, TD from gluon.validators import IS_NOT_EMPTY from gluon.html import URL from gluon.storage import Storage import os from gluon.sqlhtml import SQLFORM from gluon.validators import IS_IN_DB, IS_IN_SET, IS_EMPTY_OR from gluon.html import TBODY, THEAD, TH, TEXTAREA, CODE from gluon.sql import Field
class LDA_DB: FILENAME = "lda.db" CONNECTION = "sqlite://{}".format(FILENAME) DEFAULT_OPTIONS = {"max_co_topic_count": 40000} def __init__(self, path=None, isInit=False, isReset=False): self.isInit = isInit self.isReset = isReset if path is not None: self.db = DAL(LDA_DB.CONNECTION, lazy_tables=not self.isInit, migrate=self.isInit, folder=path) else: self.db = DAL(LDA_DB.CONNECTION, lazy_tables=not self.isInit, migrate=self.isInit) def __enter__(self): self.DefineOptionsTable() if self.isReset: self.Reset() self.DefineDimensionTables() self.DefineMatrixTables() self.DefineStatsTables() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( "options", Field("key", "string", required=True, unique=True), Field("value", "string", required=True), migrate=self.isInit, ) if self.db(self.db.options).count() == 0: for key, value in LDA_DB.DEFAULT_OPTIONS.iteritems(): self.db.options.insert(key=key, value=value) def SetOption(self, key, value): keyValue = self.db(self.db.options.key == key).select().first() if keyValue: keyValue.update_record(value=value) else: self.db.options.insert(key=key, value=value) def GetOption(self, key): keyValue = self.db(self.db.options.key == key).select(self.db.options.value).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineDimensionTables(self): self.db.define_table( "terms", Field("term_index", "integer", required=True, unique=True, default=-1), Field("term_text", "string", required=True, unique=True), Field("term_freq", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql("CREATE UNIQUE INDEX IF NOT EXISTS terms_index ON terms (term_index);") self.db.executesql("CREATE UNIQUE INDEX IF NOT EXISTS terms_text ON terms (term_text);") self.db.executesql("CREATE INDEX IF NOT EXISTS terms_freq ON terms (term_freq);") self.db.executesql("CREATE INDEX IF NOT EXISTS terms_rank ON terms (rank);") self.db.define_table( "docs", Field("doc_index", "integer", required=True, unique=True, default=-1), Field("doc_id", "string", required=True, unique=True), Field("doc_freq", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql("CREATE UNIQUE INDEX IF NOT EXISTS docs_index ON docs (doc_index);") self.db.executesql("CREATE UNIQUE INDEX IF NOT EXISTS docs_id ON docs (doc_id);") self.db.executesql("CREATE INDEX IF NOT EXISTS docs_freq ON docs (doc_freq);") self.db.executesql("CREATE INDEX IF NOT EXISTS docs_rank ON docs (rank);") self.db.define_table( "topics", Field("topic_index", "integer", required=True, unique=True, default=-1), Field("topic_freq", "double", required=True), Field("topic_label", "string", required=True), Field("topic_desc", "string", required=True), Field("top_terms", "list:integer", required=True), Field("top_docs", "list:integer", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql("CREATE UNIQUE INDEX IF NOT EXISTS topics_index ON topics (topic_index);") self.db.executesql("CREATE INDEX IF NOT EXISTS topics_freq ON topics (topic_freq);") self.db.executesql("CREATE INDEX IF NOT EXISTS topics_rank ON topics (rank);") def DefineMatrixTables(self): self.db.define_table( "term_topic_matrix", Field("term_index", "integer", required=True, default=-1), Field("topic_index", "integer", required=True, default=-1), Field("value", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql( "CREATE UNIQUE INDEX IF NOT EXISTS term_topic_indexes ON term_topic_matrix (term_index, topic_index);" ) self.db.executesql("CREATE INDEX IF NOT EXISTS term_topic_value ON term_topic_matrix (value);") self.db.executesql("CREATE INDEX IF NOT EXISTS term_topic_rank ON term_topic_matrix (rank);") self.db.executesql("CREATE INDEX IF NOT EXISTS term_topic_termindex ON term_topic_matrix (term_index);") self.db.executesql("CREATE INDEX IF NOT EXISTS term_topic_topicindex ON term_topic_matrix (topic_index);") self.db.define_table( "doc_topic_matrix", Field("doc_index", "integer", required=True, default=-1), Field("topic_index", "integer", required=True, default=-1), Field("value", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql( "CREATE UNIQUE INDEX IF NOT EXISTS doc_topic_indexes ON doc_topic_matrix (doc_index, topic_index);" ) self.db.executesql("CREATE INDEX IF NOT EXISTS doc_topic_value ON doc_topic_matrix (value);") self.db.executesql("CREATE INDEX IF NOT EXISTS doc_topic_rank ON doc_topic_matrix (rank);") self.db.executesql("CREATE INDEX IF NOT EXISTS doc_topic_docindex ON doc_topic_matrix (doc_index);") self.db.executesql("CREATE INDEX IF NOT EXISTS doc_topic_topicindex ON doc_topic_matrix (topic_index);") def DefineStatsTables(self): self.db.define_table( "topic_cooccurrences", Field("first_topic_index", "integer", required=True, default=-1), Field("second_topic_index", "integer", required=True, default=-1), Field("value", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql( "CREATE UNIQUE INDEX IF NOT EXISTS topic_cooccurrences_indexes ON topic_cooccurrences (first_topic_index, second_topic_index);" ) self.db.executesql("CREATE INDEX IF NOT EXISTS topic_cooccurrences_value ON topic_cooccurrences (value);") self.db.executesql("CREATE INDEX IF NOT EXISTS topic_cooccurrences_rank ON topic_cooccurrences (rank);") self.db.define_table( "topic_covariance", Field("first_topic_index", "integer", required=True, default=-1), Field("second_topic_index", "integer", required=True, default=-1), Field("value", "double", required=True), Field("rank", "integer", required=True), migrate=self.isInit, ) if self.isInit: self.db.executesql( "CREATE UNIQUE INDEX IF NOT EXISTS topic_covariance_indexes ON topic_covariance (first_topic_index, second_topic_index);" ) self.db.executesql("CREATE INDEX IF NOT EXISTS topic_covariance_value ON topic_covariance (value);") self.db.executesql("CREATE INDEX IF NOT EXISTS topic_covariance_rank ON topic_covariance (rank);") ################################################################################ def Reset(self): self.db.executesql("DELETE FROM terms;") self.db.executesql("DELETE FROM docs;") self.db.executesql("DELETE FROM topics;") self.db.executesql("DELETE FROM term_topic_matrix;") self.db.executesql("DELETE FROM doc_topic_matrix;") self.db.executesql("DELETE FROM topic_cooccurrences;") self.db.executesql("DELETE FROM topic_covariance;")
response.google_analytics_id = None if not request.env.web2py_runtime_gae: request.data_folder = None if os.path.isdir(os.path.join('..', '..', 'data')): request.data_folder = os.path.join('..', '..', 'data') elif os.path.isdir(os.path.join('..', '..', 'databases')): request.data_folder = os.path.join('..', '..', 'databases') elif os.path.isdir(os.path.join('..', 'data')): request.data_folder = os.path.join('..', 'data') elif os.path.isdir(os.path.join('..', 'databases')): request.data_folder = os.path.join('..', 'databases') ename = "sqlite" db = DAL('sqlite://storage.sqlite', migrate=False, fake_migrate=False, folder=request.data_folder) session_db = DAL('sqlite://session.sqlite', folder=request.data_folder) session.connect(request, response, db=session_db) reload(sys) sys.setdefaultencoding("utf-8") #@UndefinedVariable else: ename = "google_datastore" #db = DAL('google:datastore://storage', migrate=False, fake_migrate=False) db = DAL('google:datastore', adapter_args={ 'ndb_settings': None, 'use_ndb': False }) session.connect(request, response, db=db) #from gluon.contrib.memdb import MEMDB
class LDA_DB(): FILENAME = 'lda.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = { 'max_co_topic_count' : 40000 } def __init__(self, path = None, isInit = False, isReset = False): self.isInit = isInit self.isReset = isReset if path is not None: self.db = DAL(LDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit, folder = path) else: self.db = DAL(LDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit) def __enter__(self): self.DefineOptionsTable() if self.isReset: self.Reset() self.DefineDimensionTables() self.DefineMatrixTables() self.DefineStatsTables() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) if self.db(self.db.options).count() == 0: for key, value in LDA_DB.DEFAULT_OPTIONS.iteritems(): self.db.options.insert( key = key, value = value ) def SetOption(self, key, value): keyValue = self.db( self.db.options.key == key ).select().first() if keyValue: keyValue.update_record( value = value ) else: self.db.options.insert( key = key, value = value ) def GetOption(self, key): keyValue = self.db( self.db.options.key == key ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineDimensionTables(self): self.db.define_table( 'terms', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text' , 'string' , required = True, unique = True ), Field( 'term_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_index ON terms (term_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_text ON terms (term_text);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_freq ON terms (term_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_rank ON terms (rank);' ) self.db.define_table( 'docs', Field( 'doc_index', 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_index ON docs (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_id ON docs (doc_id);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_freq ON docs (doc_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_rank ON docs (rank);' ) self.db.define_table( 'topics', Field( 'topic_index', 'integer' , required = True, unique = True, default = -1 ), Field( 'topic_freq' , 'double' , required = True ), Field( 'topic_label', 'string' , required = True ), Field( 'topic_desc' , 'string' , required = True ), Field( 'top_terms' , 'list:integer', required = True ), Field( 'top_docs' , 'list:integer', required = True ), Field( 'rank' , 'integer' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topics_index ON topics (topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_freq ON topics (topic_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_rank ON topics (rank);' ) def DefineMatrixTables(self): self.db.define_table( 'term_topic_matrix', Field( 'term_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_topic_indexes ON term_topic_matrix (term_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_value ON term_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_rank ON term_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_termindex ON term_topic_matrix (term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_topicindex ON term_topic_matrix (topic_index);' ) self.db.define_table( 'doc_topic_matrix', Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS doc_topic_indexes ON doc_topic_matrix (doc_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_value ON doc_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_rank ON doc_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_docindex ON doc_topic_matrix (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_topicindex ON doc_topic_matrix (topic_index);' ) def DefineStatsTables(self): self.db.define_table( 'topic_cooccurrences', Field( 'first_topic_index' , 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_cooccurrences_indexes ON topic_cooccurrences (first_topic_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cooccurrences_value ON topic_cooccurrences (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_cooccurrences_rank ON topic_cooccurrences (rank);' ) self.db.define_table( 'topic_covariance', Field( 'first_topic_index', 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_covariance_indexes ON topic_covariance (first_topic_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_covariance_value ON topic_covariance (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_covariance_rank ON topic_covariance (rank);' ) ################################################################################ def Reset(self): self.db.executesql( 'DELETE FROM terms;' ) self.db.executesql( 'DELETE FROM docs;' ) self.db.executesql( 'DELETE FROM topics;' ) self.db.executesql( 'DELETE FROM term_topic_matrix;' ) self.db.executesql( 'DELETE FROM doc_topic_matrix;' ) self.db.executesql( 'DELETE FROM topic_cooccurrences;' ) self.db.executesql( 'DELETE FROM topic_covariance;' )
class Corpus_DB(): FILENAME = 'corpus.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DOC_IDS = ['doc_id', 'docid'] DOC_CONTENTS = ['doc_content', 'doccontent', 'docbody', 'doc_body'] MODEL_KEY = 'corpus' MODEL_DESC = 'Text Corpus' MODEL_ENTRY = { 'model_key' : MODEL_KEY, 'model_desc' : MODEL_DESC } LINEBREAKS_TABS = re.compile(r'[\t\r\n\f]') DEFAULT_OPTIONS = { 'token_regex' : r'\w{3,}', # Tokenize a corpus into a bag-of-words language model 'min_freq' : 5, # Number of times a term must appear in the corpus 'min_doc_freq' : 3 # Number of documents in which a terms must appear } def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit, folder = path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineModelsTable() self.DefineCorpusTable() self.DefineMetadataTables() return self def __exit__(self, type, value, traceback): self.DefineCorpusTextSearch() self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) for key, value in Corpus_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption( key, value, overwrite = self.isInit ) def SetOption(self, key, value, overwrite = True): where = self.db.options.key == key if self.db( where ).count() > 0: if overwrite: self.db( where ).update( value = value ) else: self.db.options.insert( key = key, value = value ) self.db.commit() def GetOption(self, key): where = self.db.options.key == key keyValue = self.db( where ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None def DefineModelsTable(self): self.db.define_table( 'models', Field( 'model_key' , 'string', required = True, unique = True ), Field( 'model_desc', 'string', required = True ), migrate = self.isInit ) def AddModel(self, model_key, model_desc): where = self.db.models.model_key == model_key if self.db( where ).count() > 0: self.db( where ).update( model_desc = model_desc ) else: self.db.models.insert( model_key = model_key, model_desc = model_desc ) self.db.commit() def GetModel(self, model_key): if model_key == Corpus_DB.MODEL_KEY: return Corpus_DB.MODEL_ENTRY where = self.db.models.model_key == model_key keyValue = self.db( where ).select( self.db.models.ALL ).first() if keyValue: return { 'model_key' : keyValue.model_key, 'model_desc' : keyValue.model_desc } else: return None def GetModels(self): rows = self.db( self.db.models ).select( self.db.models.model_key, self.db.models.model_desc ).as_list() return [ Corpus_DB.MODEL_ENTRY ] + rows ################################################################################ def DefineCorpusTable(self): self.db.define_table( 'corpus', Field( 'doc_index' , 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_content', 'text' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_index ON corpus (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_id ON corpus (doc_id);' ) def DefineMetadataTables(self): self.db.define_table( 'fields', Field( 'field_index', 'integer', required = True, unique = True, default = -1 ), Field( 'field_name' , 'string' , required = True, unique = True ), Field( 'field_type' , 'string' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_index ON fields (field_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_name ON fields (field_name);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS field_type ON fields (field_type);' ) self.db.define_table( 'metadata', Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'field_index', 'integer', required = True, default = -1 ), Field( 'value' , 'text' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS metadata_indexes ON metadata (doc_index, field_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_doc_index ON metadata (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_field_index ON metadata (field_index);' ) ################################################################################ def DefineCorpusTextSearch(self): if self.isInit: self.db.executesql( 'DROP TABLE IF EXISTS corpus_search;' ) self.db.executesql( 'CREATE VIRTUAL TABLE corpus_search USING fts3 (doc_content TEXT);' ) self.db.executesql( 'INSERT INTO corpus_search (rowid, doc_content) SELECT doc_index, doc_content FROM corpus;' ) ################################################################################ def SanitizeText(self, text): text = Corpus_DB.LINEBREAKS_TABS.sub(u' ', text).strip() return text def ImportFromFile(self, filename): """ filename = A plain-text file (utf-8 encoded) containing one document per line """ def ReadFile(): with open(filename, 'r') as f: for index, line in enumerate(f): doc_index = index values = line.decode('utf-8', 'ignore').rstrip('\n').split('\t') if len(values) == 1: doc_id = 'doc{}'.format(doc_index+1) doc_content = values[0] else: doc_id = values[0] doc_content = values[1] yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } self.db.corpus.bulk_insert(ReadFile()) def ImportFromFolder(self, glob_pattern): """ glob_pattern = A folder of files or a glob pattern for list of files (utf-8 encoded, one document per file) """ def ReadFolder(): filenames = sorted(glob.glob(glob_pattern)) for index, filename in enumerate(filenames): doc_index = index doc_id = filename with open(filename, 'r') as f: doc_content = f.read().decode('utf-8', 'ignore') yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } self.db.corpus.bulk_insert(ReadFolder()) def ImportFromSpreadsheet(self, filename, id_key = None, content_key = None, is_csv = False): """ filename = A tab- or comman-separated spreadsheet (utf-8 encoded, with a header row containing column names) id_key = Name of the column containing unique document IDs content_key = Name of the column containing the document contents """ doc_id_keys = frozenset([id_key] if id_key is not None else Corpus_DB.DOC_IDS) doc_content_keys = frozenset([content_key] if content_key is not None else Corpus_DB.DOC_CONTENTS) field_indexes = [] field_names = [] field_types = [] metadata = [] def ReadCSV(): with open(filename, 'r') as f: reader = UnicodeReader(f) for row in reader: yield row def ReadTSV(): with open(filename, 'r') as f: for line in f: yield line.decode('utf-8', 'ignore').rstrip('\n').split('\t') def ReadSpreadsheet(reader): field_doc_id = None field_doc_content = None for row_index, values in enumerate(reader): if row_index == 0: for index, field in enumerate(values): if field.lower() in doc_id_keys: field_doc_id = index elif field.lower() in doc_content_keys: field_doc_content = index else: field_index = len(field_indexes) field_indexes.append(field_index) field_names.append(field) field_types.append('integer') else: doc_index = row_index - 1 doc_id = 'doc{:d}'.format(doc_index+1) doc_content = '' field_index = 0 for index, value in enumerate(values): if field_doc_id == index: doc_id = value elif field_doc_content == index: doc_content = value else: metadata.append({ 'doc_index' : doc_index, 'field_index' : field_index, 'value' : value.encode('utf-8', 'ignore') }) # [START] infer field type field_type = field_types[field_index] if field_type == 'integer': try: int(value) except ValueError: field_type = 'double' if field_type == 'double': try: float(value) except ValueError: field_type = 'string' field_types[field_index] = field_type # [END] infer field type field_index += 1 yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } def GetFields(): for field_index in field_indexes: field_name = field_names[field_index] field_type = field_types[field_index] yield { 'field_index' : field_index, 'field_name' : field_name, 'field_type' : field_type } reader = ReadCSV() if is_csv else ReadTSV() self.db.corpus.bulk_insert(ReadSpreadsheet(reader)) self.db.fields.bulk_insert(GetFields()) self.db.metadata.bulk_insert(metadata) def ExportToFile(self, filename): """ filename = A tab-delimited file (utf-8 encoded, without header) containing docIDs and document contents """ def WriteFile(rows): with open(filename, 'w') as f: for row in rows: doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) f.write(u'{}\t{}\n'.format(doc_id, doc_content).encode('utf-8', 'ignore')) rows = self.db().select(self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) WriteFile(rows) def ExportToSpreadsheet(self, filename, is_csv = False): """ filename = A tab- or comma-separated spreadsheet (utf-8 encoded, with header) containing the text corpus and all metadata """ field_names = [ row.field_name for row in self.db().select(self.db.fields.field_name, orderby = self.db.fields.field_index) ] field_count = len(field_names) all_field_names = [ 'doc_id', 'doc_content' ] + field_names def WriteCSV(rows): with open(filename, 'w') as f: writer = UnicodeWriter(f) writer.writerow(all_field_names) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = self.SanitizeText(d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values writer.writerow(all_values) def WriteTSV(rows): with open(filename, 'w') as f: f.write(u'{}\n'.format(u'\t'.join(all_field_names)).encode('utf-8', 'ignore')) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = self.SanitizeText(d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values f.write(u'{}\n'.format(u'\t'.join(all_values)).encode('utf-8', 'ignore')) rows = self.db().select(self.db.corpus.doc_index, self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) if is_csv: WriteCSV(rows) else: WriteTSV(rows)
class LDA_DB(): FILENAME = 'lda.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = { 'max_co_topic_count' : 10000 # Number of topic pairs to store } def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(LDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit, folder = path) else: self.db = DAL(LDA_DB.CONNECTION, lazy_tables = not self.isInit, migrate = self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineDimensionTables() self.DefineMatrixTables() self.DefineStatsTables() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) for key, value in LDA_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption( key, value, overwrite = self.isInit ) def SetOption(self, key, value, overwrite = True): where = self.db.options.key == key if self.db( where ).count() > 0: if overwrite: self.db( where ).update( value = value ) else: self.db.options.insert( key = key, value = value ) self.db.commit() def GetOption(self, key): where = self.db.options.key == key keyValue = self.db( where ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineDimensionTables(self): self.db.define_table( 'terms', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text' , 'string' , required = True, unique = True ), Field( 'term_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_index ON terms (term_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS terms_text ON terms (term_text);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_freq ON terms (term_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS terms_rank ON terms (rank);' ) self.db.define_table( 'docs', Field( 'doc_index', 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_freq' , 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_index ON docs (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS docs_id ON docs (doc_id);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_freq ON docs (doc_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS docs_rank ON docs (rank);' ) self.db.define_table( 'topics', Field( 'topic_index', 'integer' , required = True, unique = True, default = -1 ), Field( 'topic_freq' , 'double' , required = True ), Field( 'topic_label', 'string' , required = True ), Field( 'topic_desc' , 'string' , required = True ), Field( 'top_terms' , 'list:integer', required = True ), Field( 'top_docs' , 'list:integer', required = True ), Field( 'rank' , 'integer' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topics_index ON topics (topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_freq ON topics (topic_freq);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topics_rank ON topics (rank);' ) def DefineMatrixTables(self): self.db.define_table( 'term_topic_matrix', Field( 'term_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_topic_indexes ON term_topic_matrix (term_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_value ON term_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_rank ON term_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_termindex ON term_topic_matrix (term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_topic_topicindex ON term_topic_matrix (topic_index);' ) self.db.define_table( 'doc_topic_matrix', Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS doc_topic_indexes ON doc_topic_matrix (doc_index, topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_value ON doc_topic_matrix (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_rank ON doc_topic_matrix (rank);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_docindex ON doc_topic_matrix (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS doc_topic_topicindex ON doc_topic_matrix (topic_index);' ) def DefineStatsTables(self): self.db.define_table( 'topic_covariance', Field( 'first_topic_index', 'integer', required = True, default = -1 ), Field( 'second_topic_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS topic_covariance_indexes ON topic_covariance (first_topic_index, second_topic_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_covariance_value ON topic_covariance (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS topic_covariance_rank ON topic_covariance (rank);' ) ################################################################################ def Reset(self): self.db.executesql( 'DELETE FROM terms;' ) self.db.executesql( 'DELETE FROM docs;' ) self.db.executesql( 'DELETE FROM topics;' ) self.db.executesql( 'DELETE FROM term_topic_matrix;' ) self.db.executesql( 'DELETE FROM doc_topic_matrix;' ) self.db.executesql( 'DELETE FROM topic_covariance;' )
from gluon.sql import DAL from gluon.sql import Field from gluon.sql import SQLDB from gluon.sqlhtml import SQLFORM from gluon.validators import * from gluon import fileutils from gluon.http import * from gluon.sqlhtml import * from gluon.tools import fetch import datetime from datetime import timedelta from datetime import date # configuration = AppConfig() db = DAL('sqlite://storage.sqlite') MIGRATE_SETTING = False db.define_table("admins", Field("administrator"), Field("admin_key", "password"), migrate=MIGRATE_SETTING) db.define_table("id_refs", Field("classroom_id_ref", default="0"), Field("quiz_id_ref", default="0"), Field("quiz_question_id_ref", default="0"), Field("student_id_ref", default="0"), Field("teacher_id_ref", default="0"), migrate=MIGRATE_SETTING) db.define_table("teachers",
class BOW_DB(): FILENAME = 'bow.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DEFAULT_OPTIONS = { 'max_freq_count': 4000, # Maximum number of terms to store 'max_co_freq_count': 100000 # Maximum number of term pairs to store } def __init__(self, path=None, isInit=False): self.isInit = isInit if path is not None: self.db = DAL(BOW_DB.CONNECTION, lazy_tables=not self.isInit, migrate_enabled=self.isInit, folder=path) else: self.db = DAL(BOW_DB.CONNECTION, lazy_tables=not self.isInit, migrate_enabled=self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineTermStatsTables() self.DefineTermCoStatsTables() self.DefineSentenceCoStatsTables() self.DefineTemporaryTable() return self def __exit__(self, type, value, traceback): self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table('options', Field('key', 'string', required=True, unique=True), Field('value', 'string', required=True), migrate=self.isInit) for key, value in BOW_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption(key, value, overwrite=self.isInit) def SetOption(self, key, value, overwrite=True): where = self.db.options.key == key if self.db(where).count() > 0: if overwrite: self.db(where).update(value=value) else: self.db.options.insert(key=key, value=value) self.db.commit() def GetOption(self, key): where = self.db.options.key == key keyValue = self.db(where).select(self.db.options.value).first() if keyValue: return keyValue.value else: return None ################################################################################ def DefineTermStatsTables(self): self.db.define_table('term_texts', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('term_text', 'string', required=True, unique=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_text_value ON term_texts (term_text);' ) self.db.define_table('term_freqs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_value ON term_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_rank ON term_freqs (rank);' ) self.db.define_table('term_probs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_value ON term_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_rank ON term_probs (rank);' ) self.db.define_table('term_doc_freqs', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_value ON term_doc_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_rank ON term_doc_freqs (rank);' ) def DefineTermCoStatsTables(self): self.db.define_table('term_co_freqs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_freqs_indexes ON term_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_value ON term_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_rank ON term_co_freqs (rank);' ) self.db.define_table('term_co_probs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_probs_indexes ON term_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_value ON term_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_rank ON term_co_probs (rank);' ) self.db.define_table('term_g2', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_g2_indexes ON term_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_value ON term_g2 (value);') self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_rank ON term_g2 (rank);') def DefineSentenceCoStatsTables(self): self.db.define_table('sentences_co_freqs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_freqs_indexes ON sentences_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_value ON sentences_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_rank ON sentences_co_freqs (rank);' ) self.db.define_table('sentences_co_probs', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_probs_indexes ON sentences_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_value ON sentences_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_rank ON sentences_co_probs (rank);' ) self.db.define_table('sentences_g2', Field('first_term_index', 'integer', required=True, default=-1), Field('second_term_index', 'integer', required=True, default=-1), Field('value', 'double', required=True), Field('rank', 'integer', required=True), migrate=self.isInit) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_g2_indexes ON sentences_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_value ON sentences_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_rank ON sentences_g2 (rank);' ) ################################################################################ def Reset(self): self.db.executesql('DELETE FROM term_texts;') self.db.executesql('DELETE FROM term_freqs;') self.db.executesql('DELETE FROM term_probs;') self.db.executesql('DELETE FROM term_doc_freqs;') self.db.executesql('DELETE FROM term_co_freqs;') self.db.executesql('DELETE FROM term_co_probs;') self.db.executesql('DELETE FROM term_g2;') self.db.executesql('DELETE FROM sentences_co_freqs;') self.db.executesql('DELETE FROM sentences_co_probs;') self.db.executesql('DELETE FROM sentences_g2;') ################################################################################ def DefineTemporaryTable(self): self.db.define_table('vocab', Field('term_index', 'integer', required=True, unique=True, default=-1), Field('term_text', 'string', required=True), migrate=self.isInit) self.db.define_table('vocab_text', Field('term_text', 'string', required=True, unique=True, default=-1), migrate=self.isInit)
def test_DAL_person_table(): conn_str = "postgres://*****:*****@tornado/joseph" db = DAL(conn_str, check_reserved=['postgres']) db.define_table('person', Field('name')) db.person.insert(name="Alex") db.commit()
class Corpus_DB(): FILENAME = 'corpus.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DOC_IDS = ['doc_id', 'docid'] DOC_CONTENTS = ['doc_content', 'doccontent', 'docbody', 'doc_body'] DEFAULT_OPTIONS = { 'token_regex' : r'\w{3,}', 'min_freq' : 5, 'min_doc_freq' : 3, 'max_freq_count' : 4000, 'max_co_freq_count' : 160000, 'max_g2_count' : 160000 } def __init__(self, path = None, isInit = False, isImport = False, isReset = False): self.isInit = isInit self.isImport = isImport self.isReset = isReset isInitOrImport = self.isInit or self.isImport if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not isInitOrImport, migrate_enabled = isInitOrImport, folder = path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not isInitOrImport, migrate_enabled = isInitOrImport) def __enter__(self): self.DefineOptionsTable() self.DefineModelsTable() self.DefineCorpusTable() self.DefineMetadataTables() if self.isReset: self.Reset() self.DefineTermStatsTables() self.DefineTermCoStatsTables() self.DefineSentenceCoStatsTables() self.DefineTemporaryTable() return self def __exit__(self, type, value, traceback): self.DefineCorpusTextSearch() self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isImport ) if self.isInit: for key, value in Corpus_DB.DEFAULT_OPTIONS.iteritems(): keyValue = self.db( self.db.options.key == key ).select().first() if keyValue: keyValue.update_record( value = value ) else: self.db.options.insert( key = key, value = value ) def SetOption(self, key, value): keyValue = self.db( self.db.options.key == key ).select().first() if keyValue: keyValue.update_record( value = value ) else: self.db.options.insert( key = key, value = value ) def GetOption(self, key): keyValue = self.db( self.db.options.key == key ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None def DefineModelsTable(self): self.db.define_table( 'models', Field( 'model_key' , 'string', required = True, unique = True ), Field( 'model_desc', 'string', required = True ), migrate = self.isImport ) def AddModel(self, model_key, model_desc): keyDesc = self.db( self.db.models.model_key == model_key ).select().first() if keyDesc: keyDesc.update_record( model_desc = model_desc ) else: self.db.models.insert( model_key = model_key, model_desc = model_desc ) def GetModels(self): models = self.db( self.db.models ).select( self.db.models.model_key ) return [ model.model_key for model in models ] def GetModelDescription(self, model_key): model = self.db( self.db.models.model_key == model_key ).select().first() if model: return model.model_desc else: return None ################################################################################ def DefineCorpusTable(self): self.db.define_table( 'corpus', Field( 'doc_index' , 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_content', 'text' , required = True ), migrate = self.isImport ) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_index ON corpus (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_id ON corpus (doc_id);' ) def DefineMetadataTables(self): self.db.define_table( 'fields', Field( 'field_index', 'integer', required = True, unique = True, default = -1 ), Field( 'field_name' , 'string' , required = True, unique = True ), Field( 'field_type' , 'string' , required = True ), migrate = self.isImport ) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_index ON fields (field_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_name ON fields (field_name);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS field_type ON fields (field_type);' ) self.db.define_table( 'metadata', Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'field_index', 'integer', required = True, default = -1 ), Field( 'value' , 'text' , required = True ), migrate = self.isImport ) if self.isImport: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS metadata_indexes ON metadata (doc_index, field_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_doc_index ON metadata (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_field_index ON metadata (field_index);' ) ################################################################################ def DefineCorpusTextSearch(self): if self.isImport: self.db.executesql( 'DROP TABLE IF EXISTS corpus_search;' ) self.db.executesql( 'CREATE VIRTUAL TABLE corpus_search USING fts3 (doc_content TEXT);' ) self.db.executesql( 'INSERT INTO corpus_search (rowid, doc_content) SELECT doc_index, doc_content FROM corpus;' ) ################################################################################ def ImportFromFile(self, filename): """ filename = A plain-text file (utf-8 encoded) containing one document per line """ def ReadFile(): with open(filename, 'r') as f: for index, line in enumerate(f): doc_index = index values = line.decode('utf-8', 'ignore').rstrip('\n').split('\t') if len(values) == 1: doc_id = 'doc{}'.format(doc_index+1) doc_content = values[0] else: doc_id = values[0] doc_content = values[1] yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8') } self.db.corpus.bulk_insert(ReadFile()) def ImportFromFolder(self, glob_pattern): """ glob_pattern = A folder of files or a glob pattern for list of files (utf-8 encoded, one document per file) """ def ReadFolder(): filenames = sorted(glob.glob(glob_pattern)) for index, filename in enumerate(filenames): doc_index = index doc_id = filename with open(filename, 'r') as f: doc_content = f.read().decode('utf-8', 'ignore') yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8') } self.db.corpus.bulk_insert(ReadFolder()) def ImportFromSpreadsheet(self, filename, id_key = None, content_key = None, is_csv = False): """ filename = A tab- or comman-separated spreadsheet (utf-8 encoded, with a header row containing column names) id_key = Name of the column containing unique document IDs content_key = Name of the column containing the document contents """ doc_id_keys = frozenset([id_key] if id_key is not None else Corpus_DB.DOC_IDS) doc_content_keys = frozenset([content_key] if content_key is not None else Corpus_DB.DOC_CONTENTS) field_indexes = [] field_names = [] field_types = [] metadata = [] def ReadCSV(): with open(filename, 'r') as f: reader = UnicodeReader(f) for row in reader: yield row def ReadTSV(): with open(filename, 'r') as f: for line in f: yield line.decode('utf-8', 'ignore').rstrip('\n').split('\t') def ReadSpreadsheet(reader): field_doc_id = None field_doc_content = None for row_index, values in enumerate(reader): if row_index == 0: for index, field in enumerate(values): if field.lower() in doc_id_keys: field_doc_id = index elif field.lower() in doc_content_keys: field_doc_content = index else: field_index = len(field_indexes) field_indexes.append(field_index) field_names.append(field) field_types.append('integer') else: doc_index = row_index - 1 doc_id = 'doc{:d}'.format(doc_index+1) doc_content = '' field_index = 0 for index, value in enumerate(values): if field_doc_id == index: doc_id = value elif field_doc_content == index: doc_content = value else: metadata.append({ 'doc_index' : doc_index, 'field_index' : field_index, 'value' : value.encode('utf-8') }) # [START] infer field type field_type = field_types[field_index] if field_type == 'integer': try: int(value) except ValueError: field_type = 'double' if field_type == 'double': try: float(value) except ValueError: field_type = 'string' field_types[field_index] = field_type # [END] infer field type field_index += 1 yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8') } def GetFields(): for field_index in field_indexes: field_name = field_names[field_index] field_type = field_types[field_index] yield { 'field_index' : field_index, 'field_name' : field_name, 'field_type' : field_type } reader = ReadCSV() if is_csv else ReadTSV() self.db.corpus.bulk_insert(ReadSpreadsheet(reader)) self.db.fields.bulk_insert(GetFields()) self.db.metadata.bulk_insert(metadata) def ExportToFile(self, filename): """ filename = A tab-delimited file (utf-8 encoded, without header) containing docIDs and document contents """ def WriteFile(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: for row in rows: doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) f.write(u'{}\t{}\n'.format(doc_id, doc_content).encode('utf-8')) rows = self.db().select(self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) WriteFile(rows) def ExportToSpreadsheet(self, filename, is_csv = False): """ filename = A tab- or comma-separated spreadsheet (utf-8 encoded, with header) containing the text corpus and all metadata """ field_names = [ row.field_name for row in self.db().select(self.db.fields.field_name, orderby = self.db.fields.field_index) ] field_count = len(field_names) all_field_names = [ 'doc_id', 'doc_content' ] + field_names def WriteCSV(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: writer = UnicodeWriter(f) writer.writerow(all_field_names) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = m.sub(u' ', d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values writer.writerow(all_values) def WriteTSV(rows): m = re.compile(r'\s+') with open(filename, 'w') as f: f.write(u'{}\n'.format(u'\t'.join(all_field_names)).encode('utf-8')) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = m.sub(u' ', row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = m.sub(u' ', d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values f.write(u'{}\n'.format(u'\t'.join(all_values)).encode('utf-8')) rows = self.db().select(self.db.corpus.doc_index, self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) if is_csv: WriteCSV(rows) else: WriteTSV(rows) ################################################################################ def DefineTermStatsTables(self): self.db.define_table( 'term_texts', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text' , 'string' , required = True, unique = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_text_value ON term_texts (term_text);' ) self.db.define_table( 'term_freqs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_value ON term_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_freqs_rank ON term_freqs (rank);' ) self.db.define_table( 'term_probs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_value ON term_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_probs_rank ON term_probs (rank);' ) self.db.define_table( 'term_doc_freqs', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_value ON term_doc_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_doc_freqs_rank ON term_doc_freqs (rank);' ) def DefineTermCoStatsTables(self): self.db.define_table( 'term_co_freqs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_freqs_indexes ON term_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_value ON term_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_freqs_rank ON term_co_freqs (rank);' ) self.db.define_table( 'term_co_probs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_co_probs_indexes ON term_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_value ON term_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_co_probs_rank ON term_co_probs (rank);' ) self.db.define_table( 'term_g2', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS term_g2_indexes ON term_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_value ON term_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS term_g2_rank ON term_g2 (rank);' ) def DefineSentenceCoStatsTables(self): self.db.define_table( 'sentences_co_freqs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_freqs_indexes ON sentences_co_freqs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_value ON sentences_co_freqs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_freqs_rank ON sentences_co_freqs (rank);' ) self.db.define_table( 'sentences_co_probs', Field( 'first_term_index' , 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_co_probs_indexes ON sentences_co_probs (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_value ON sentences_co_probs (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_co_probs_rank ON sentences_co_probs (rank);' ) self.db.define_table( 'sentences_g2', Field( 'first_term_index', 'integer', required = True, default = -1 ), Field( 'second_term_index', 'integer', required = True, default = -1 ), Field( 'value', 'double' , required = True ), Field( 'rank' , 'integer', required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS sentences_g2_indexes ON sentences_g2 (first_term_index, second_term_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_value ON sentences_g2 (value);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS sentences_g2_rank ON sentences_g2 (rank);' ) ################################################################################ def Reset(self): self.db.executesql( 'DELETE FROM term_texts;' ) self.db.executesql( 'DELETE FROM term_freqs;' ) self.db.executesql( 'DELETE FROM term_probs;' ) self.db.executesql( 'DELETE FROM term_doc_freqs;' ) self.db.executesql( 'DELETE FROM term_co_freqs;' ) self.db.executesql( 'DELETE FROM term_co_probs;' ) self.db.executesql( 'DELETE FROM term_g2;' ) self.db.executesql( 'DELETE FROM sentences_co_freqs;' ) self.db.executesql( 'DELETE FROM sentences_co_probs;' ) self.db.executesql( 'DELETE FROM sentences_g2;' ) ################################################################################ def DefineTemporaryTable(self): self.db.define_table( 'vocab', Field( 'term_index', 'integer', required = True, unique = True, default = -1 ), Field( 'term_text', 'string', required = True ), migrate = self.isInit ) self.db.define_table( 'vocab_text', Field( 'term_text', 'string', required = True, unique = True, default = -1 ), migrate = self.isInit )
class Corpus_DB(): FILENAME = 'corpus.db' CONNECTION = 'sqlite://{}'.format(FILENAME) DOC_IDS = ['doc_id', 'docid'] DOC_CONTENTS = ['doc_content', 'doccontent', 'docbody', 'doc_body'] MODEL_KEY = 'corpus' MODEL_DESC = 'Text Corpus' MODEL_ENTRY = { 'model_key' : MODEL_KEY, 'model_desc' : MODEL_DESC } LINEBREAKS_TABS = re.compile(r'[\t\r\n\f]') DEFAULT_OPTIONS = { 'token_regex' : r'\w{3,}', # Tokenize a corpus into a bag-of-words language model 'min_freq' : 5, # Number of times a term must appear in the corpus 'min_doc_freq' : 3 # Number of documents in which a terms must appear } def __init__(self, path = None, isInit = False): self.isInit = isInit if path is not None: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit, folder = path) else: self.db = DAL(Corpus_DB.CONNECTION, lazy_tables = not self.isInit, migrate_enabled = self.isInit) def __enter__(self): self.DefineOptionsTable() self.DefineModelsTable() self.DefineCorpusTable() self.DefineMetadataTables() return self def __exit__(self, type, value, traceback): self.DefineCorpusTextSearch() self.db.commit() ################################################################################ def DefineOptionsTable(self): self.db.define_table( 'options', Field( 'key' , 'string', required = True, unique = True ), Field( 'value', 'string', required = True ), migrate = self.isInit ) for key, value in Corpus_DB.DEFAULT_OPTIONS.iteritems(): self.SetOption( key, value, overwrite = self.isInit ) def SetOption(self, key, value, overwrite = True): where = self.db.options.key == key if self.db( where ).count() > 0: if overwrite: self.db( where ).update( value = value ) else: self.db.options.insert( key = key, value = value ) self.db.commit() def GetOption(self, key): where = self.db.options.key == key keyValue = self.db( where ).select( self.db.options.value ).first() if keyValue: return keyValue.value else: return None def DefineModelsTable(self): self.db.define_table( 'models', Field( 'model_key' , 'string', required = True, unique = True ), Field( 'model_desc', 'string', required = True ), migrate = self.isInit ) def AddModel(self, model_key, model_desc): where = self.db.models.model_key == model_key if self.db( where ).count() > 0: self.db( where ).update( model_desc = model_desc ) else: self.db.models.insert( model_key = model_key, model_desc = model_desc ) self.db.commit() def GetModel(self, model_key): if model_key == Corpus_DB.MODEL_KEY: return Corpus_DB.MODEL_ENTRY where = self.db.models.model_key == model_key keyValue = self.db( where ).select( self.db.models.ALL ).first() if keyValue: return { 'model_key' : keyValue.model_key, 'model_desc' : keyValue.model_desc } else: return None def GetModels(self): rows = self.db( self.db.models ).select( self.db.models.model_key, self.db.models.model_desc ).as_list() return [ Corpus_DB.MODEL_ENTRY ] + rows ################################################################################ def DefineCorpusTable(self): self.db.define_table( 'corpus', Field( 'doc_index' , 'integer', required = True, unique = True, default = -1 ), Field( 'doc_id' , 'string' , required = True, unique = True ), Field( 'doc_content', 'text' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_index ON corpus (doc_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS corpus_doc_id ON corpus (doc_id);' ) def DefineMetadataTables(self): self.db.define_table( 'fields', Field( 'field_index', 'integer', required = True, unique = True, default = -1 ), Field( 'field_name' , 'string' , required = True, unique = True ), Field( 'field_type' , 'string' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_index ON fields (field_index);' ) self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS field_name ON fields (field_name);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS field_type ON fields (field_type);' ) self.db.define_table( 'metadata', Field( 'doc_index' , 'integer', required = True, default = -1 ), Field( 'field_index', 'integer', required = True, default = -1 ), Field( 'value' , 'text' , required = True ), migrate = self.isInit ) if self.isInit: self.db.executesql( 'CREATE UNIQUE INDEX IF NOT EXISTS metadata_indexes ON metadata (doc_index, field_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_doc_index ON metadata (doc_index);' ) self.db.executesql( 'CREATE INDEX IF NOT EXISTS metadata_field_index ON metadata (field_index);' ) ################################################################################ def DefineCorpusTextSearch(self): if self.isInit: pass # self.db.executesql( 'DROP TABLE IF EXISTS corpus_search;' ) # self.db.executesql( 'CREATE VIRTUAL TABLE corpus_search USING fts3 (doc_content TEXT);' ) # self.db.executesql( 'INSERT INTO corpus_search (rowid, doc_content) SELECT doc_index, doc_content FROM corpus;' ) ################################################################################ def SanitizeText(self, text): text = Corpus_DB.LINEBREAKS_TABS.sub(u' ', text).strip() return text def ImportFromFile(self, filename): """ filename = A plain-text file (utf-8 encoded) containing one document per line """ def ReadFile(): with open(filename, 'r') as f: for index, line in enumerate(f): doc_index = index values = line.decode('utf-8', 'ignore').rstrip('\n').split('\t') if len(values) == 1: doc_id = 'doc{}'.format(doc_index+1) doc_content = values[0] else: doc_id = values[0] doc_content = values[1] yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } self.db.corpus.bulk_insert(ReadFile()) def ImportFromFolder(self, glob_pattern): """ glob_pattern = A folder of files or a glob pattern for list of files (utf-8 encoded, one document per file) """ def ReadFolder(): filenames = sorted(glob.glob(glob_pattern)) for index, filename in enumerate(filenames): doc_index = index doc_id = filename with open(filename, 'r') as f: doc_content = f.read().decode('utf-8', 'ignore') yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } self.db.corpus.bulk_insert(ReadFolder()) def ImportFromSpreadsheet(self, filename, id_key = None, content_key = None, is_csv = False): """ filename = A tab- or comman-separated spreadsheet (utf-8 encoded, with a header row containing column names) id_key = Name of the column containing unique document IDs content_key = Name of the column containing the document contents """ doc_id_keys = frozenset([id_key] if id_key is not None else Corpus_DB.DOC_IDS) doc_content_keys = frozenset([content_key] if content_key is not None else Corpus_DB.DOC_CONTENTS) field_indexes = [] field_names = [] field_types = [] metadata = [] def ReadCSV(): with open(filename, 'r') as f: reader = UnicodeReader(f) for row in reader: yield row def ReadTSV(): with open(filename, 'r') as f: for line in f: yield line.decode('utf-8', 'ignore').rstrip('\n').split('\t') def ReadSpreadsheet(reader): field_doc_id = None field_doc_content = None for row_index, values in enumerate(reader): if row_index == 0: for index, field in enumerate(values): if field.lower() in doc_id_keys: field_doc_id = index elif field.lower() in doc_content_keys: field_doc_content = index else: field_index = len(field_indexes) field_indexes.append(field_index) field_names.append(field) field_types.append('integer') else: doc_index = row_index - 1 doc_id = 'doc{:d}'.format(doc_index+1) doc_content = '' field_index = 0 for index, value in enumerate(values): if field_doc_id == index: doc_id = value elif field_doc_content == index: doc_content = value else: metadata.append({ 'doc_index' : doc_index, 'field_index' : field_index, 'value' : value.encode('utf-8', 'ignore') }) # [START] infer field type field_type = field_types[field_index] if field_type == 'integer': try: int(value) except ValueError: field_type = 'double' if field_type == 'double': try: float(value) except ValueError: field_type = 'string' field_types[field_index] = field_type # [END] infer field type field_index += 1 yield { 'doc_index' : doc_index, 'doc_id' : doc_id, 'doc_content' : doc_content.encode('utf-8', 'ignore') } def GetFields(): for field_index in field_indexes: field_name = field_names[field_index] field_type = field_types[field_index] yield { 'field_index' : field_index, 'field_name' : field_name, 'field_type' : field_type } reader = ReadCSV() if is_csv else ReadTSV() self.db.corpus.bulk_insert(ReadSpreadsheet(reader)) self.db.fields.bulk_insert(GetFields()) self.db.metadata.bulk_insert(metadata) def ExportToFile(self, filename): """ filename = A tab-delimited file (utf-8 encoded, without header) containing docIDs and document contents """ def WriteFile(rows): with open(filename, 'w') as f: for row in rows: doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) f.write(u'{}\t{}\n'.format(doc_id, doc_content).encode('utf-8', 'ignore')) rows = self.db().select(self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) WriteFile(rows) def ExportToSpreadsheet(self, filename, id_key = None, content_key = None, is_csv = False): """ filename = A tab- or comma-separated spreadsheet (utf-8 encoded, with header) containing the text corpus and all metadata """ field_names = [ row.field_name for row in self.db().select(self.db.fields.field_name, orderby = self.db.fields.field_index) ] field_count = len(field_names) all_field_names = [ id_key if id_key is not None else 'doc_id', content_key if content_key is not None else 'doc_content' ] + field_names def WriteCSV(rows): with open(filename, 'w') as f: writer = UnicodeWriter(f) writer.writerow(all_field_names) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = self.SanitizeText(d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values writer.writerow(all_values) def WriteTSV(rows): with open(filename, 'w') as f: f.write(u'{}\n'.format(u'\t'.join(all_field_names)).encode('utf-8', 'ignore')) for row in rows: doc_index = row.doc_index doc_id = row.doc_id doc_content = self.SanitizeText(row.doc_content.decode('utf-8')) values = [u''] * field_count for d in self.db(self.db.metadata.doc_index == doc_index).select(self.db.metadata.field_index, self.db.metadata.value): values[d.field_index] = self.SanitizeText(d.value.decode('utf-8')) all_values = [ doc_id, doc_content ] + values f.write(u'{}\n'.format(u'\t'.join(all_values)).encode('utf-8', 'ignore')) rows = self.db().select(self.db.corpus.doc_index, self.db.corpus.doc_id, self.db.corpus.doc_content, orderby = self.db.corpus.doc_index) if is_csv: WriteCSV(rows) else: WriteTSV(rows)
http://www.nervatura.com Copyright © 2011-2015, Csaba Kappel License: LGPLv3 http://www.nervatura.com/nerva2py/default/licenses """ if 0: from gluon.globals import Session global session session = Session() global request request = globals.Request() import gluon.languages.translator as T from gluon.sql import DAL global db db = DAL() global response response = globals.Response() import pyamf from pyamf.flex import ArrayCollection from gluon.tools import Service from nerva2py.nervastore import NervaStore from nerva2py.tools import NervaTools from nerva2py.npi import Npi import nerva2py.models if request.env.http_origin: response.headers['Access-Control-Allow-Origin'] = request.env.http_origin else:
http://www.nervatura.com/nerva2py/default/licenses """ if 0: global response response = globals.Response() global request request = globals.Request() from gluon.globals import Session global session session = Session() from gluon.sql import DAL global db db = DAL() import gluon.languages.translator as T from db import DEMO_MODE from gluon.http import redirect from gluon.sqlhtml import DIV, SPAN, A, INPUT, FORM from gluon.html import BR, HR, SELECT, OPTION, P, IMG, TABLE, TR, TD from gluon.validators import IS_NOT_EMPTY from gluon.html import URL from gluon.storage import Storage import os from gluon.sqlhtml import SQLFORM from gluon.validators import IS_IN_DB, IS_IN_SET, IS_EMPTY_OR from gluon.html import TBODY, THEAD, TH, TEXTAREA, CODE from gluon.sql import Field