def update(self, old_info, update_info): """ :param old_info: The info/vuln instance to be updated in the kb. :param update_info: The info/vuln instance with new information :return: Nothing """ old_not_info = not isinstance(old_info, (Info, InfoSet, Shell)) update_not_info = not isinstance(update_info, (Info, InfoSet, Shell)) if old_not_info or update_not_info: msg = 'You MUST use raw_write/raw_read to store non-info objects'\ ' to the KnowledgeBase.' raise TypeError(msg) old_uniq_id = old_info.get_uniq_id() new_uniq_id = update_info.get_uniq_id() pickle = cpickle_dumps(update_info) # Update the pickle and unique_id after finding by original uniq_id query = "UPDATE %s SET pickle = ?, uniq_id = ? WHERE uniq_id = ?" params = (pickle, new_uniq_id, old_uniq_id) result = self.db.execute(query % self.table_name, params).result() if result.rowcount: self._notify_observers(self.UPDATE, old_info, update_info) else: ex = 'Failed to update() %s instance because' \ ' the original unique_id (%s) does not exist in the DB,' \ ' or the new unique_id (%s) is invalid.' raise DBException(ex % (old_info.__class__.__name__, old_uniq_id, new_uniq_id))
def find(self, searchData, result_limit=-1, orderData=[], full=False): """Make complex search. search_data = {name: (value, operator), ...} orderData = [(name, direction)] """ result = [] sql = 'SELECT * FROM ' + self._DATA_TABLE where = WhereHelper(searchData) sql += where.sql() orderby = "" # # TODO we need to move SQL code to parent class # for item in orderData: orderby += item[0] + " " + item[1] + "," orderby = orderby[:-1] if orderby: sql += " ORDER BY " + orderby sql += ' LIMIT ' + str(result_limit) try: for row in self._db.select(sql, where.values()): item = self.__class__() item._load_from_row(row, full) result.append(item) except DBException: msg = 'You performed an invalid search. Please verify your syntax.' raise DBException(msg) return result
def _load_from_trace_file_concurrent(self, _id): """ Load a request/response from a trace file on disk, using retries and error handling to make sure all concurrency issues are handled. :param _id: The request-response ID :return: A tuple containing request and response instances """ wait_time = 0.05 # # Retry the read a few times to handle concurrency issues # for _ in xrange(int(1 / wait_time)): try: return self._load_from_trace_file(_id) except TraceReadException as e: args = (_id, e) msg = 'Failed to read trace file %s: "%s"' om.out.debug(msg % args) time.sleep(wait_time) else: msg = 'Timeout expecting trace file "%s" to be ready' file_name = self._get_trace_filename_for_id(_id) raise DBException(msg % file_name)
def load(self, _id=None, full=True, retry=True): """Load data from DB by ID.""" if not _id: _id = self.id sql = 'SELECT * FROM ' + self._DATA_TABLE + ' WHERE id = ? ' try: row = self._db.select_one(sql, (_id, )) except DBException, dbe: msg = 'An unexpected error occurred while searching for id "%s"'\ ' in table "%s". Original exception: "%s".' raise DBException(msg % (_id, self._DATA_TABLE, dbe))
def load_from_file(self, _id): fname = self._get_fname_for_id(_id) WAIT_TIME = 0.05 # # Due to some concurrency issues, we need to perform these checks # for _ in xrange(int(1 / WAIT_TIME)): if not os.path.exists(fname): time.sleep(WAIT_TIME) continue # Ok... the file exists, but it might still be being written req_res = gzip.open(fname, 'rb', compresslevel=self.COMPRESSION_LEVEL) try: data = msgpack.load(req_res, use_list=True) except ValueError: # ValueError: Extra data. returned when msgpack finds invalid # data in the file req_res.close() time.sleep(WAIT_TIME) continue try: request_dict, response_dict, canary = data except TypeError: # https://github.com/andresriancho/w3af/issues/1101 # 'NoneType' object is not iterable req_res.close() time.sleep(WAIT_TIME) continue if not canary == self._MSGPACK_CANARY: # read failed, most likely because the file write is not # complete but for some reason it was a valid msgpack file req_res.close() time.sleep(WAIT_TIME) continue # Success! req_res.close() request = HTTPRequest.from_dict(request_dict) response = HTTPResponse.from_dict(response_dict) return request, response else: msg = 'Timeout expecting trace file to be ready "%s"' % fname raise DBException(msg)
def run(self): """ This is the "main" method for this class, the one that consumes the commands which are sent to the Queue. The idea is to have the following architecture features: * Other parts of the framework which want to insert into the DB simply add an item to our input Queue and "forget about it" since it will be processed in another thread. * Only one thread accesses the sqlite3 object, which avoids many issues because of sqlite's non thread-safeness The Queue.get() will make sure we don't have 100% CPU usage in the loop """ OP_CODES = { SETUP: self._setup_handler, QUERY: self._query_handler, SELECT: self._select_handler, COMMIT: self._commit_handler, POISON: POISON } while True: op_code, args, kwds, future = self._in_queue.get() args = args or () kwds = kwds or {} if self.DEBUG: print '%s %s %s' % (op_code, args, kwds) handler = OP_CODES.get(op_code, None) if handler is None: # Invalid OPCODE continue elif handler == POISON: break else: if not future.set_running_or_notify_cancel(): return try: result = handler(*args, **kwds) except Exception, e: dbe = DBException(str(e)) future.set_exception(dbe) else: future.set_result(result)
if retry: # TODO: # According to sqlite3 documentation this db.commit() # might fix errors like # https://sourceforge.net/apps/trac/w3af/ticket/164352 , # but it can degrade performance due to disk IO # self._db.commit() self.load(_id=_id, full=full, retry=False) else: # This is the second time load() is called and we end up # here, raise an exception and finish our pain. msg = ('An internal error occurred while searching for ' 'id "%s", even after commit/retry' % _id) raise DBException(msg) return True @verify_has_db def read(self, _id, full=True): """Return item by ID.""" result_item = self.__class__() result_item.load(_id, full) return result_item def save(self): """Save object into DB.""" resp = self.response code = int(resp.get_code()) / 100
def run(self): """ This is the "main" method for this class, the one that consumes the commands which are sent to the Queue. The idea is to have the following architecture features: * Other parts of the framework which want to insert into the DB simply add an item to our input Queue and "forget about it" since it will be processed in another thread. * Only one thread accesses the sqlite3 object, which avoids many issues because of sqlite's non thread-safeness The Queue.get() will make sure we don't have 100% CPU usage in the loop """ OP_CODES = { SETUP: self._setup_handler, QUERY: self._query_handler, SELECT: self._select_handler, COMMIT: self._commit_handler, POISON: POISON } while True: op_code, args, kwds, future = self._in_queue.get() self._current_query_num += 1 args = args or () kwds = kwds or {} self._report_qsize_limit_reached() if self.DEBUG: self._report_qsize() #print('%s %s %s' % (op_code, args, kwds)) handler = OP_CODES.get(op_code, None) if not future.set_running_or_notify_cancel(): return if handler is None: # Invalid OPCODE future.set_result(False) continue if handler == POISON: self._poison_pill_received = True future.set_result(True) break try: result = handler(*args, **kwds) except sqlite3.OperationalError, e: # I don't like this string match, but it seems that the # exception doesn't have any error code to match if 'no such table' in str(e): dbe = NoSuchTableException(str(e)) elif 'malformed' in str(e): print(DB_MALFORMED_ERROR) dbe = MalformedDBException(DB_MALFORMED_ERROR) else: # More specific exceptions to be added here later... dbe = DBException(str(e)) future.set_exception(dbe) except Exception as e: dbe = DBException(str(e)) future.set_exception(dbe)
class HistoryItem(object): """ Represents history item """ _db = None _DATA_TABLE = 'history_items' _COLUMNS = [ ('id', 'INTEGER'), ('url', 'TEXT'), ('code', 'INTEGER'), ('tag', 'TEXT'), ('mark', 'INTEGER'), ('info', 'TEXT'), ('time', 'FLOAT'), ('msg', 'TEXT'), ('content_type', 'TEXT'), ('charset', 'TEXT'), ('method', 'TEXT'), ('response_size', 'INTEGER'), ('codef', 'INTEGER'), ('alias', 'TEXT'), ('has_qs', 'INTEGER') ] _PRIMARY_KEY_COLUMNS = ('id',) _INDEX_COLUMNS = ('alias',) _EXTENSION = 'trace' _MSGPACK_CANARY = 'cute-and-yellow' _COMPRESSED_EXTENSION = 'zip' _COMPRESSED_FILE_BATCH = 150 _UNCOMPRESSED_FILES = 50 _COMPRESSION_LEVEL = 7 _MIN_FILE_COUNT = _COMPRESSED_FILE_BATCH + _UNCOMPRESSED_FILES _pending_compression_jobs = [] _latest_compression_job_end = 0 id = None url = None _request = None _response = None info = None mark = False tag = '' content_type = '' response_size = 0 method = 'GET' msg = 'OK' code = 200 time = 0.2 charset = None history_lock = threading.RLock() compression_lock = threading.RLock() def __init__(self): self._db = get_default_temp_db_instance() self._session_dir = os.path.join(get_temp_dir(), self._db.get_file_name() + '_traces') def get_session_dir(self): return self._session_dir def init(self): self.init_traces_dir() self.init_db() def init_traces_dir(self): with self.history_lock: if not os.path.exists(self._session_dir): os.mkdir(self._session_dir) def init_db(self): """ Init history table and indexes. """ with self.history_lock: tablename = self.get_table_name() if not self._db.table_exists(tablename): pk_cols = self.get_primary_key_columns() idx_cols = self.get_index_columns() self._db.create_table(tablename, self.get_columns(), pk_cols).result() self._db.create_index(tablename, idx_cols).result() def get_response(self): resp = self._response if not resp and self.id: self._request, resp = self.load_from_file(self.id) self._response = resp return resp def set_response(self, resp): self._response = resp response = property(get_response, set_response) def get_request(self): req = self._request if not req and self.id: req, self._response = self.load_from_file(self.id) self._request = req return req def set_request(self, req): self._request = req request = property(get_request, set_request) @verify_has_db def find(self, search_data, result_limit=-1, order_data=None): """ Make complex search. search_data = {name: (value, operator), ...} order_data = [(name, direction)] """ order_data = order_data or [] result = [] sql = 'SELECT * FROM ' + self._DATA_TABLE where = WhereHelper(search_data) sql += where.sql() order_by = '' # # TODO we need to move SQL code to parent class # for item in order_data: order_by += item[0] + ' ' + item[1] + ',' order_by = order_by[:-1] if order_by: sql += ' ORDER BY ' + order_by sql += ' LIMIT ' + str(result_limit) try: for row in self._db.select(sql, where.values()): item = self.__class__() item._load_from_row(row) result.append(item) except DBException: msg = 'You performed an invalid search. Please verify your syntax.' raise DBException(msg) return result def _load_from_row(self, row): """ Load data from row with all columns """ self.id = row[0] self.url = row[1] self.code = row[2] self.tag = row[3] self.mark = bool(row[4]) self.info = row[5] self.time = float(row[6]) self.msg = row[7] self.content_type = row[8] self.charset = row[9] self.method = row[10] self.response_size = int(row[11]) def _get_trace_filename_for_id(self, _id): return os.path.join(self._session_dir, '%s.%s' % (_id, self._EXTENSION)) def _load_from_trace_file(self, _id): """ Load a request/response from a trace file on disk. This is the simplest implementation, without any retries for concurrency issues. :param _id: The request-response ID :return: A tuple containing request and response instances """ file_name = self._get_trace_filename_for_id(_id) if not os.path.exists(file_name): raise TraceReadException('Trace file %s does not exist' % file_name) # The file exists, but the contents might not be all on-disk yet serialized_req_res = open(file_name, 'rb').read() return self._load_from_string(serialized_req_res) def _load_from_string(self, serialized_req_res): try: data = msgpack.loads(serialized_req_res, use_list=True) except ValueError: # ValueError: Extra data. returned when msgpack finds invalid # data in the file raise TraceReadException('Failed to load %s' % serialized_req_res) try: request_dict, response_dict, canary = data except TypeError: # https://github.com/andresriancho/w3af/issues/1101 # 'NoneType' object is not iterable raise TraceReadException('Not all components found in %s' % serialized_req_res) if not canary == self._MSGPACK_CANARY: # read failed, most likely because the file write is not # complete but for some reason it was a valid msgpack file raise TraceReadException('Invalid canary in %s' % serialized_req_res) request = HTTPRequest.from_dict(request_dict) response = HTTPResponse.from_dict(response_dict) return request, response def _load_from_trace_file_concurrent(self, _id): """ Load a request/response from a trace file on disk, using retries and error handling to make sure all concurrency issues are handled. :param _id: The request-response ID :return: A tuple containing request and response instances """ wait_time = 0.05 # # Retry the read a few times to handle concurrency issues # for _ in xrange(int(1 / wait_time)): try: return self._load_from_trace_file(_id) except TraceReadException as e: args = (_id, e) msg = 'Failed to read trace file %s: "%s"' om.out.debug(msg % args) time.sleep(wait_time) else: msg = 'Timeout expecting trace file "%s" to be ready' file_name = self._get_trace_filename_for_id(_id) raise DBException(msg % file_name) def load_from_file(self, _id): """ Loads a request/response from a trace file on disk. Two different options exist: * The file is compressed inside a zip * The file is uncompressed in a trace :param _id: The request-response ID :return: A tuple containing request and response instances """ # # First we check if the trace file exists and try to load it from # the uncompressed trace # file_name = self._get_trace_filename_for_id(_id) if os.path.exists(file_name): return self._load_from_trace_file_concurrent(_id) # # The trace file doesn't exist, try to find the zip file where the # compressed file lives and read it from there # try: return self._load_from_zip(_id) except TraceReadException as e: msg = 'Failed to load trace %s from zip file: "%s"' args = (_id, e) om.out.debug(msg % args) # # Give the .trace file a last chance, it might be possible that when # we checked for os.path.exists(file_name) at the beginning of this # method the file wasn't there yet, but is on disk now # return self._load_from_trace_file_concurrent(_id) def _load_from_zip(self, _id): files = os.listdir(self.get_session_dir()) files = [f for f in files if f.endswith(self._COMPRESSED_EXTENSION)] for zip_file in files: start, end = get_zip_id_range(zip_file) if start <= _id <= end: return self._load_from_zip_file(_id, zip_file) raise TraceReadException('No zip file contains %s' % _id) def _load_from_zip_file(self, _id, zip_file): _zip = zipfile.ZipFile(os.path.join(self.get_session_dir(), zip_file)) try: serialized_req_res = _zip.read('%s.%s' % (_id, self._EXTENSION)) except KeyError: # We get here when the zip file doesn't contain the trace file msg = 'Zip file %s does not contain ID %s' args = (zip_file, _id) raise TraceReadException(msg % args) return self._load_from_string(serialized_req_res) @verify_has_db def delete(self, _id=None): """ Delete data from DB by ID. """ if _id is None: _id = self.id sql = 'DELETE FROM ' + self._DATA_TABLE + ' WHERE id = ? ' self._db.execute(sql, (_id,)) fname = self._get_trace_filename_for_id(_id) try: os.remove(fname) except OSError: pass @verify_has_db def load(self, _id=None, retry=True): """ Load data from DB by ID """ if _id is None: _id = self.id sql = 'SELECT * FROM ' + self._DATA_TABLE + ' WHERE id = ? ' try: row = self._db.select_one(sql, (_id,)) except DBException, dbe: msg = ('An unexpected error occurred while searching for id "%s"' ' in table "%s". Original exception: "%s".') raise DBException(msg % (_id, self._DATA_TABLE, dbe)) if row is not None: self._load_from_row(row) return True if not retry: # # This is the second time load() is called and we end up # here, raise an exception and finish our pain. # msg = ('An internal error occurred while searching for id "%s",' ' even after commit/retry') raise DBException(msg % _id) # # The request/response with _id is not in the DB! # Lets do some error handling and try again! # # According to sqlite3 documentation this db.commit() # might fix errors like [0] but it can degrade performance due # to disk IO # # [0] https://sourceforge.net/apps/trac/w3af/ticket/164352 , # self._db.commit() return self.load(_id=_id, retry=False)