def insert_the_statements(self, input_tuples): print("Loading %d statements..." % len(input_tuples)) self.test_db.copy('raw_statements', [t[1:] for t in input_tuples], self.test_data['raw_statements']['cols'][1:]) print("Inserting agents...") dbu.insert_agents(self.test_db, 'raw') return
def upload_statements(stmt_data_list, db=None): """Upload the statements to the database.""" if db is None: db = get_primary_db() logger.info("Uploading %d statements to the database." % len(stmt_data_list)) db.copy('statements', [s.make_tuple() for s in stmt_data_list], StatementData.get_cols()) logger.info("Uploading agents to the database.") reading_id_set = set([sd.reading_id for sd in stmt_data_list]) if len(reading_id_set): insert_agents(db, [sd.statement for sd in stmt_data_list], db.Statements.reader_ref.in_(reading_id_set)) return
def upload_statements(stmt_data_list, db=None): """Upload the statements to the database.""" if db is None: db = get_primary_db() logger.info("Uploading %d statements to the database." % len(stmt_data_list)) db.copy('raw_statements', [s.make_tuple() for s in stmt_data_list], StatementData.get_cols()) logger.info("Uploading agents to the database.") reading_id_set = set([sd.reading_id for sd in stmt_data_list]) if len(reading_id_set): db_stmts = (db.select_one(db.RawStatements, db.RawStatements.uuid.like(s.uuid)) for s in stmt_data_list) insert_agents(db, 'raw', db_stmts, verbose=True) return
def add_statements(self, fraction=1, with_pa=False): """Add statements and agents to the database. Parameters ---------- fraction : float between 0 and 1 Default is 1. The fraction of remaining statements to be added. with_pa : bool Default False. Choose to run pre-assembly/incremental-preassembly on the added statements. """ available_tuples = self.get_available_stmt_tuples() if fraction is not 1: num_stmts = fraction*len(available_tuples) input_tuples = random.sample(available_tuples, num_stmts) else: input_tuples = available_tuples print("Loading %d statements..." % len(input_tuples)) if hasattr(self.test_db.RawStatements, 'id'): self.test_db.copy('raw_statements', input_tuples, self.test_data['raw_statements']['cols']) else: self.test_db.copy('raw_statements', [t[1:] for t in input_tuples], self.test_data['raw_statements']['cols'][1:]) print("Inserting agents...") db_util.insert_agents(self.test_db, 'raw') if with_pa: print("Preassembling new statements...") if len(input_tuples) > 100: batch_size = len(input_tuples)//10 pam = pm.PreassemblyManager(1, batch_size) else: pam = pm.PreassemblyManager() if self.used_stmt_tuples: pam.supplement_corpus(self.test_db) else: pam.create_corpus(self.test_db) return
def upload_statements(stmt_data_list, db=None): """Upload the statements to the database.""" if db is None: db = get_primary_db() logger.info("Uploading %d statements to the database." % len(stmt_data_list)) db.copy('raw_statements', [s.make_tuple() for s in stmt_data_list], StatementData.get_cols()) logger.info("Uploading agents to the database.") reading_id_set = set([sd.reading_id for sd in stmt_data_list]) if len(reading_id_set): uuid_set = {s.statement.uuid for s in stmt_data_list} insert_agents(db, 'raw', db.RawStatements.uuid.in_(uuid_set), verbose=True, override_default_query=True) return
def _get_loaded_db(num_stmts, batch_size=None, split=None, with_init_corpus=False): print("Creating and filling a test database:") db = _get_background_loaded_db() # Now load the statements. Much of this processing is the result of active # development, and once that is done, TODO: Format pickle to match copy_stmt_tuples, copy_col_names = _get_input_stmt_tuples(num_stmts) print("\tInserting the raw statements...") if split is None: db.copy('raw_statements', copy_stmt_tuples, copy_col_names) print("\tAdding agents...") db_util.insert_agents(db, 'raw') if with_init_corpus: print("\tAdding a preassembled corpus...") pa_manager = pm.PreassemblyManager() pa_manager.create_corpus(db) else: assert batch_size is not None num_initial = int(split * len(copy_stmt_tuples)) stmt_tuples_initial = random.sample(copy_stmt_tuples, num_initial) stmt_tuples_new = list( set(copy_stmt_tuples) - set(stmt_tuples_initial)) initial_datetime = datetime.now() - timedelta(days=2) db.copy('raw_statements', [t + (initial_datetime, ) for t in stmt_tuples_initial], copy_col_names + ('create_date', )) print("\tAdding agents...") db_util.insert_agents(db, 'raw') if with_init_corpus: print("\tAdding a preassembled corpus from first batch of raw " "stmts...") pa_manager = pm.PreassemblyManager(batch_size=batch_size) pa_manager.create_corpus(db) print("\tInserting the rest of the raw statements...") new_datetime = datetime.now() db.copy('raw_statements', [t + (new_datetime, ) for t in stmt_tuples_new], copy_col_names + ('create_date', )) print("\tAdding agents...") db_util.insert_agents(db, 'raw') return db