class QueueWatcher(threading.Thread): queue_name = None types = None logger = None batch_obj = None batch_queue = None def __init__(self, queue_name, types, batch_obj, error_queue): threading.Thread.__init__(self) self.queue_name = queue_name self.types = types self.batch_obj = batch_obj self.batch_queue = BatchQueue(queue_name, batch_obj) self.error_queue = error_queue def run(self): logger.info("Starting watcher on queue: %s" % self.queue_name) while True: try: while True: self.batch_queue.dequeue() logger.info("Dequeuing %s batch." % self.queue_name) for key in self.types: if self.batch_obj.type == key: self.types[key].import_batch(self.batch_obj) break except django.db.utils.IntegrityError: logger.warning("Tried to create a dup in queue: %s. Ignoring." % self.queue_name) except: logger.debug("Problem in %s queue:" % self.queue_name, exc_info=sys.exc_info()) self.error_queue.put(sys.exc_info()) raise
def __init__(self, queue_name, types, batch_obj, error_queue): threading.Thread.__init__(self) self.queue_name = queue_name self.types = types self.batch_obj = batch_obj self.batch_queue = BatchQueue(queue_name, batch_obj) self.error_queue = error_queue
def register_schema_map(self, sourceid): cur = self.conn.cursor() mappings = self.get_field_mappings_by_source(sourceid, only_unmapped=True) cmd = "SELECT id, name FROM global_attributes" cur.execute(cmd) global_attributes = self.dictfetchall(cur) batch_obj = expertsrc_pb2.QuestionBatch() batch_obj.type = expertsrc_pb2.QuestionBatch.SCHEMAMAP # TODO: make sure to grab this from auth service using provided cookie batch_obj.asker_name = 'data-tamer' cmd = """SELECT local_id, value FROM local_sources ls, local_source_meta lsm WHERE ls.id = %s and lsm.source_id = ls.id and lsm.meta_name = 'expertsrc_domain'""" cur.execute(cmd, (sourceid, )) source_name, domain_name = cur.fetchone() logger.info('source_name -> %s' % source_name) logger.info('domain_name -> %s' % domain_name) batch_obj.source_name = source_name batch = BatchQueue('question', batch_obj) for fid in mappings.keys(): question = batch.getbatchobj().question.add() question.domain_name = domain_name question.local_field_id = fid question.local_field_name = mappings[fid]['name'] choices = mappings[fid]['matches'] ids = list() choice_count = 10 for c in choices: if choice_count > 0: choice = question.choice.add() choice.global_attribute_id = c['id'] choice.global_attribute_name = c['name'] choice.confidence_score = c['score'] ids.append(c['id']) choice_count -= 1 # uncomment this if you want to add all global attributes as # potential choices. id_set = set(ids) for a in global_attributes: if a['id'] not in id_set: choice = question.choice.add() choice.global_attribute_id = a['id'] choice.global_attribute_name = a['name'] batch.enqueue() self.conn.commit()
def register_schema_map(self, sourceid): cur = self.conn.cursor() mappings = self.get_field_mappings_by_source(sourceid, only_unmapped=True) cmd = "SELECT id, name FROM global_attributes" cur.execute(cmd) global_attributes = self.dictfetchall(cur) batch_obj = expertsrc_pb2.QuestionBatch() batch_obj.type = expertsrc_pb2.QuestionBatch.SCHEMAMAP # TODO: make sure to grab this from auth service using provided cookie batch_obj.asker_name = 'data-tamer' cmd = """SELECT local_id, value FROM local_sources ls, local_source_meta lsm WHERE ls.id = %s and lsm.source_id = ls.id and lsm.meta_name = 'expertsrc_domain'""" cur.execute(cmd, (sourceid,)) source_name, domain_name = cur.fetchone() logger.info('source_name -> %s' % source_name) logger.info('domain_name -> %s' % domain_name) batch_obj.source_name = source_name batch = BatchQueue('question', batch_obj) for fid in mappings.keys(): question = batch.getbatchobj().question.add() question.domain_name = domain_name question.local_field_id = fid question.local_field_name = mappings[fid]['name'] choices = mappings[fid]['matches'] ids = list() choice_count = 10 for c in choices: if choice_count > 0: choice = question.choice.add() choice.global_attribute_id = c['id'] choice.global_attribute_name = c['name'] choice.confidence_score = c['score'] ids.append(c['id']) choice_count -= 1 # uncomment this if you want to add all global attributes as # potential choices. id_set = set(ids) for a in global_attributes: if a['id'] not in id_set: choice = question.choice.add() choice.global_attribute_id = a['id'] choice.global_attribute_name = a['name'] batch.enqueue() self.conn.commit()
def create_mappings(self, pairs, anti=False, answerer_id=0, answerer_auth=0.5): if len(pairs) == 0: return cur = self.conn.cursor() params = [] batch_obj = expertsrc_pb2.AnswerBatch() batch_obj.type = expertsrc_pb2.AnswerBatch.SCHEMAMAP batch = BatchQueue('answer', batch_obj) cmd = 'INSERT INTO attribute_mappings (' \ ' local_id, global_id, confidence, authority, who_created, ' \ ' when_created, why_created) ' \ 'VALUES ' if anti: cmd = cmd.replace('mappings', 'antimappings') for local_id, global_id, conf in pairs: cmd = cmd + '(%s, %s, %s, %s, %s, NOW(), %s), ' params.append(local_id) params.append(global_id) params.append(int(conf) / 100.0) params.append(answerer_auth) params.append(answerer_id) params.append("Expertsrc") # register answers with expertsrc answer = batch.getbatchobj().answer.add() answer.answerer_id = answerer_id answer.confidence = int(conf) / 100.0 answer.authority = answerer_auth answer.global_attribute_id = int(global_id) answer.local_field_id = int(local_id) answer.is_match = not anti batch.enqueue() cmd = cmd[0:-2] + ';' cur.execute(cmd, params) self.conn.commit() return str([cmd, params])