def updateReport(self, report_id, parsed_request): log.debug("Got this request %s" % parsed_request) try: report_path = self.report_dir.child(report_id) report_metadata_path = self.report_dir.child(report_id + METADATA_EXT) except InsecurePath: raise e.OONIBError(406, "Invalid report_id") content_format = parsed_request.get('format', 'yaml') if content_format == 'json': data = json_dumps(parsed_request['content']) data += "\n" elif content_format == 'yaml': data = parsed_request['content'] else: raise e.InvalidFormatField if not report_path.exists() or \ not report_metadata_path.exists(): raise e.OONIBError(404, "Report not found") with report_path.open('a') as fd: fd.write(data) report_metadata_path.touch() self.write({'status': 'success'})
def setupCollector(tor_process_protocol): def setup_complete(port): print("Exposed collector Tor hidden service on httpo://%s" % port.onion_uri) tempfile.tempdir = os.path.join(_repo_dir, 'tmp') if not os.path.isdir(tempfile.gettempdir()): os.makedirs(tempfile.gettempdir()) _temp_dir = tempfile.mkdtemp() if config.main.tor_datadir is None: log.warn("Option 'tor_datadir' in oonib.conf is unspecified!") log.msg("Creating tmp directory in current directory for datadir.") log.debug("Using %s" % _temp_dir) datadir = _temp_dir else: datadir = config.main.tor_datadir torconfig = TorConfig(tor_process_protocol.tor_protocol) public_port = 80 # XXX there is currently a bug in txtorcon that prevents data_dir from # being passed properly. Details on the bug can be found here: # https://github.com/meejah/txtorcon/pull/22 hs_endpoint = TCPHiddenServiceEndpoint(reactor, torconfig, public_port, data_dir=datadir) hidden_service = hs_endpoint.listen(reportingBackend) hidden_service.addCallback(setup_complete) hidden_service.addErrback(txSetupFailed)
def nextMutation(self): log.debug("Moving onto next mutation") # [step_idx, mutation_idx] c_step_idx, c_mutation_idx = self.factory.mutation log.debug("[%s]: c_step_idx: %s | c_mutation_idx: %s" % (self.role, c_step_idx, c_mutation_idx)) if c_step_idx >= (len(self.steps) - 1): log.err("No censorship fingerprint bisected.") log.err("Givinig up.") self.transport.loseConnection() return # This means we have mutated all bytes in the step # we should proceed to mutating the next step. log.debug("steps: %s | %s" % (self.steps, self.steps[c_step_idx])) if c_mutation_idx >= (len(self.steps[c_step_idx].values()[0]) - 1): log.debug("Finished mutating step") # increase step self.factory.mutation[0] += 1 # reset mutation idx self.factory.mutation[1] = 0 else: log.debug("Mutating next byte in step") # increase mutation index self.factory.mutation[1] += 1
def _current_step_data(self): step_idx, mutation_idx = self.factory.mutation log.debug("Mutating %s %s" % (step_idx, mutation_idx)) mutated_step = daphn3Mutate(self.steps, step_idx, mutation_idx) log.debug("Mutated packet into %s" % mutated_step) return mutated_step[self.current_step].values()[0]
def startTor(): def updates(prog, tag, summary): print("%d%%: %s" % (prog, summary)) tempfile.tempdir = os.path.join(_repo_dir, 'tmp') if not os.path.isdir(tempfile.gettempdir()): os.makedirs(tempfile.gettempdir()) _temp_dir = tempfile.mkdtemp() torconfig = TorConfig() torconfig.SocksPort = config.main.socks_port if config.main.tor2webmode: torconfig.Tor2webMode = 1 torconfig.CircuitBuildTimeout = 60 if config.main.tor_datadir is None: log.warn("Option 'tor_datadir' in oonib.conf is unspecified!") log.msg("Creating tmp directory in current directory for datadir.") log.debug("Using %s" % _temp_dir) datadir = _temp_dir else: datadir = config.main.tor_datadir torconfig.DataDirectory = datadir torconfig.save() if config.main.tor_binary is not None: d = launch_tor(torconfig, reactor, tor_binary=config.main.tor_binary, progress_updates=updates) else: d = launch_tor(torconfig, reactor, progress_updates=updates) d.addCallback(setupCollector, datadir) if ooniBouncer: d.addCallback(setupBouncer, datadir) d.addErrback(txSetupFailed)
def updateReport(self, report_id, parsed_request): log.debug("Got this request %s" % parsed_request) report_filename = os.path.join(self.report_dir, report_id) self.reports[report_id].refresh() try: with open(report_filename, 'a+') as fd: fd.write(parsed_request['content']) except IOError: e.OONIBError(404, "Report not found") self.write({'status': 'success'})
def updateReport(self, report_id, parsed_request): log.debug("Got this request %s" % parsed_request) report_filename = os.path.join(config.main.report_dir, report_id) config.reports[report_id].refresh() try: with open(report_filename, 'a+') as fd: fdesc.setNonBlocking(fd.fileno()) fdesc.writeToFD(fd.fileno(), parsed_request['content']) except IOError as exc: e.OONIBError(404, "Report not found") self.write({})
def updateReport(self, report_id, parsed_request): log.debug("Got this request %s" % parsed_request) report_filename = os.path.join(self.report_dir, report_id) try: self.reports[report_id].refresh() except KeyError: raise e.OONIBError(404, "Report not found") try: with open(report_filename, 'a+') as fd: fd.write(parsed_request['content']) except IOError: raise e.OONIBError(404, "Report not found") self.write({'status': 'success'})
def validateHeader(header): version_string = re.compile(r"^[0-9A-Za-z_\-\.]+$") name = re.compile(r"^[a-zA-Z0-9_\- ]+$") probe_asn = re.compile(r"^AS[0-9]+$") probe_cc = re.compile(r"^[A-Z]{2}$") test_helper = re.compile(r"^[A-Za-z0-9_\-]+$") expected_request = { 'software_name': name, 'software_version': version_string, 'test_name': name, 'test_version': version_string, 'probe_asn': probe_asn, 'probe_cc': probe_cc, 'data_format_version': version_string } if not header.get('probe_asn'): header['probe_asn'] = 'AS0' if not header.get('probe_cc'): header['probe_cc'] = 'ZZ' if not header.get('test_start_time'): header['test_start_time'] = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") if not header.get('data_format_version'): header['data_format_version'] = '0.1.0' for k, regexp in expected_request.items(): try: value_to_check = header[k] except KeyError: raise e.MissingField(k) log.debug("Matching %s with %s | %s" % (regexp, value_to_check, k)) if re.match(regexp, str(value_to_check)): continue raise e.InvalidRequestField(k) try: requested_test_helper = header['test_helper'] if not re.match(test_helper, str(requested_test_helper)): raise e.InvalidRequestField('test_helper') except KeyError: pass return header
def createTables(): """ XXX this is to be refactored and only exists for experimentation. """ from oonib.db import models for model_name in models.__all__: try: model = getattr(m, model_name) except Exception, e: log.err("Error in db initting") log.err(e) try: log.debug("Creating %s" % model) yield tables.runCreateTable(model, transactor, database) except Exception, e: log.debug(str(e))
def put(self): """ Update an already existing report. { 'report_id': 'XXX', 'content': 'XXX' } """ parsed_request = parseUpdateReportRequest(self.request.body) report_id = parsed_request['report_id'] log.debug("Got this request %s" % parsed_request) report_filename = os.path.join(config.main.report_dir, report_id) config.reports[report_id] = time.time() reactor.callLater(config.main.stale_time, stale_check, report_id) self.updateReport(report_filename, parsed_request['content'])
def nextStep(self): log.debug("Moving on to next step in the state walk") self.current_data_received = 0 # Python why? if self.current_step >= (len(self.steps) - 1): log.msg("Reached the end of the state machine") log.msg("Censorship fingerpint bisected!") step_idx, mutation_idx = self.factory.mutation log.msg("step_idx: %s | mutation_id: %s" % (step_idx, mutation_idx)) # self.transport.loseConnection() if self.report: self.report["mutation_idx"] = mutation_idx self.report["step_idx"] = step_idx return else: self.current_step += 1 if self._current_step_role() == self.role: # We need to send more data because we are again responsible for # doing so. self.sendPayload()
def nextStep(self): log.debug("Moving on to next step in the state walk") self.current_data_received = 0 # Python why? if self.current_step >= (len(self.steps) - 1): log.msg("Reached the end of the state machine") log.msg("Censorship fingerpint bisected!") step_idx, mutation_idx = self.factory.mutation log.msg("step_idx: %s | mutation_id: %s" % (step_idx, mutation_idx)) #self.transport.loseConnection() if self.report: self.report['mutation_idx'] = mutation_idx self.report['step_idx'] = step_idx return else: self.current_step += 1 if self._current_step_role() == self.role: # We need to send more data because we are again responsible for # doing so. self.sendPayload()
def dataReceived(self, data): current_step_role = self.steps[self.current_step].keys()[0] log.debug("Current step role %s" % current_step_role) if current_step_role == self.role: log.debug("Got a state error!") raise StepError("I should not have gotten data, while I did, \ perhaps there is something wrong with the state machine?") self.current_data_received += len(data) expected_data_in_this_state = len(self.steps[self.current_step].values()[0]) log.debug("Current data received %s" % self.current_data_received) if self.current_data_received >= expected_data_in_this_state: self.nextStep()
def dataReceived(self, data): current_step_role = self.steps[self.current_step].keys()[0] log.debug("Current step role %s" % current_step_role) if current_step_role == self.role: log.debug("Got a state error!") raise StepError("I should not have gotten data, while I did, \ perhaps there is something wrong with the state machine?") self.current_data_received += len(data) expected_data_in_this_state = len( self.steps[self.current_step].values()[0]) log.debug("Current data received %s" % self.current_data_received) if self.current_data_received >= expected_data_in_this_state: self.nextStep()
def post(self): """ Creates a new report with the input * Request {'software_name': 'XXX', 'software_version': 'XXX', 'test_name': 'XXX', 'test_version': 'XXX', 'probe_asn': 'XXX' 'content': 'XXX' } Optional: 'test_helper': 'XXX' 'client_ip': 'XXX' (not implemented, nor in client, nor in backend) The idea behind these two fields is that it would be interesting to also collect how the request was observed from the collectors point of view. We use as a unique key the client_ip address and a time window. We then need to tell the test_helper that is selected the client_ip address and tell it to expect a connection from a probe in that time window. Once the test_helper sees a connection from that client_ip it will store for the testing session the data that it receives. When the probe completes the report (or the time window is over) the final report will include also the data collected from the collectors view point. * Response {'backend_version': 'XXX', 'report_id': 'XXX'} """ # Note: the request is being validated inside of parseNewReportRequest. report_data = parseNewReportRequest(self.request.body) log.debug("Parsed this data %s" % report_data) software_name = str(report_data['software_name']) software_version = str(report_data['software_version']) probe_asn = str(report_data['probe_asn']) probe_cc = str(report_data.get('probe_cc', 'ZZ')) self.testName = str(report_data['test_name']) self.testVersion = str(report_data['test_version']) if self.policy_file: try: self.inputHashes = report_data['input_hashes'] except KeyError: raise e.InputHashNotProvided self.checkPolicy() if 'content' in report_data: content = yaml.safe_load(report_data['content']) report_header = validate_report_header(content) else: content = { 'software_name': software_name, 'software_version': software_version, 'probe_asn': probe_asn, 'probe_cc': probe_cc, 'test_name': self.testName, 'test_version': self.testVersion, 'input_hashes': self.inputHashes, 'start_time': time.time() } content['backend_version'] = config.backend_version report_header = yaml.dump(content) content = "---\n" + report_header + '...\n' if not probe_asn: probe_asn = "AS0" report_id = otime.timestamp() + '_' \ + probe_asn + '_' \ + randomStr(50) # The report filename contains the timestamp of the report plus a # random nonce report_filename = os.path.join(self.report_dir, report_id) response = { 'backend_version': config.backend_version, 'report_id': report_id } requested_helper = report_data.get('test_helper') if requested_helper: try: response['test_helper_address'] = self.helpers[ requested_helper].address except KeyError: raise e.TestHelperNotFound self.reports[report_id] = Report(report_id, self.stale_time, self.report_dir, self.archive_dir, self.reports) self.writeToReport(report_filename, content) self.write(response)
def post(self): """ Creates a new report with the input * Request {'software_name': 'XXX', 'software_version': 'XXX', 'test_name': 'XXX', 'test_version': 'XXX', 'probe_asn': 'XXX' 'content': 'XXX' } Optional: 'test_helper': 'XXX' 'client_ip': 'XXX' (not implemented, nor in client, nor in backend) The idea behind these two fields is that it would be interesting to also collect how the request was observed from the collectors point of view. We use as a unique key the client_ip address and a time window. We then need to tell the test_helper that is selected the client_ip address and tell it to expect a connection from a probe in that time window. Once the test_helper sees a connection from that client_ip it will store for the testing session the data that it receives. When the probe completes the report (or the time window is over) the final report will include also the data collected from the collectors view point. * Response {'backend_version': 'XXX', 'report_id': 'XXX'} """ # XXX here we should validate and sanitize the request try: report_data = parseNewReportRequest(self.request.body) except InvalidRequestField as exc: raise e.InvalidRequestField(exc) except MissingField as exc: raise e.MissingRequestField(exc) log.debug("Parsed this data %s" % report_data) software_name = str(report_data['software_name']) software_version = str(report_data['software_version']) probe_asn = str(report_data['probe_asn']) probe_cc = str(report_data['probe_cc']) self.testName = str(report_data['test_name']) self.testVersion = str(report_data['test_version']) if config.main.policy_file: try: self.inputHashes = report_data['input_hashes'] except KeyError: raise e.InputHashNotProvided self.checkPolicy() if 'content' in report_data: content = yaml.safe_load(report_data['content']) try: report_header = validate_report_header(content) except MissingReportHeaderKey, key: raise e.MissingReportHeaderKey(key) except InvalidReportHeader, key: raise e.InvalidReportHeader(key)
def debug(self, msg): log.debug("Current step %s" % self.current_step) log.debug("Current data received %s" % self.current_data_received) log.debug("Current role %s" % self.role) log.debug("Current steps %s" % self.steps) log.debug("Current step data %s" % self._current_step_data())
collectors view point. * Response {'backend_version': 'XXX', 'report_id': 'XXX'} """ # XXX here we should validate and sanitize the request try: report_data = parseNewReportRequest(self.request.body) except InvalidRequestField, e: raise e.InvalidRequestField(e) except MissingField, e: raise e.MissingRequestField(e) log.debug("Parsed this data %s" % report_data) software_name = report_data['software_name'] software_version = report_data['software_version'] probe_asn = report_data['probe_asn'] self.testName = report_data['test_name'] self.testVersion = report_data['test_version'] if config.main.policy_file: try: self.inputHashes = report_data['input_hashes'] except KeyError: raise e.InputHashNotProvided self.checkPolicy()
def post(self): """ Creates a new report with the input * Request {'software_name': 'XXX', 'software_version': 'XXX', 'test_name': 'XXX', 'test_version': 'XXX', 'probe_asn': 'XXX' 'content': 'XXX' } Optional: 'test_helper': 'XXX' 'client_ip': 'XXX' (not implemented, nor in client, nor in backend) The idea behind these two fields is that it would be interesting to also collect how the request was observed from the collectors point of view. We use as a unique key the client_ip address and a time window. We then need to tell the test_helper that is selected the client_ip address and tell it to expect a connection from a probe in that time window. Once the test_helper sees a connection from that client_ip it will store for the testing session the data that it receives. When the probe completes the report (or the time window is over) the final report will include also the data collected from the collectors view point. * Response { 'backend_version': 'XXX', 'report_id': 'XXX', 'supported_formats': ['yaml', 'json'] } """ # Note: the request is being validated inside of parseNewReportRequest. report_data = parseNewReportRequest(self.request.body) log.debug("Parsed this data %s" % report_data) self.testName = str(report_data['test_name']) self.testVersion = str(report_data['test_version']) if self.policy_file: try: self.inputHashes = report_data['input_hashes'] except KeyError: raise e.InputHashNotProvided self.checkPolicy() data = None if report_data['format'] == 'yaml' and 'content' not in report_data: content = { 'software_name': str(report_data['software_name']), 'software_version': str(report_data['software_version']), 'probe_asn': str(report_data['probe_asn']), 'probe_cc': str(report_data['probe_cc']), 'test_name': self.testName, 'test_version': self.testVersion, 'input_hashes': report_data.get('input_hashes', []), 'test_start_time': str(report_data['test_start_time']), 'data_format_version': str(report_data.get('data_format_version', '0.1.0')) } data = "---\n" + yaml.dump(content) + "...\n" elif report_data['format'] == 'yaml' and 'content' in report_data: header = yaml.safe_load(report_data['content']) data = "---\n" + yaml.dump(validateHeader(header)) + "...\n" report_id = otime.timestamp() + '_' \ + report_data.get('probe_asn', 'AS0') + '_' \ + randomStr(50) # The report filename contains the timestamp of the report plus a # random nonce report_path = self.report_dir.child(report_id) # We use this file to store the metadata associated with the report # submission. report_metadata_path = self.report_dir.child(report_id + METADATA_EXT) response = { 'backend_version': config.backend_version, 'report_id': report_id, 'supported_formats': ['yaml', 'json'] } requested_helper = report_data.get('test_helper') if requested_helper: try: response['test_helper_address'] = self.helpers[ requested_helper].address except KeyError: raise e.TestHelperNotFound with report_metadata_path.open('w') as f: f.write(json_dumps(report_data)) f.write("\n") report_path.touch() if data is not None: with report_path.open('w') as f: f.write(data) self.write(response)