def test_audit_errors_with_no_files(env_setup, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [""] audit() out, err = capsys.readouterr() assert "No files given to process." in err assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 1]
def dispatch(e, args): e.election_dirname = ids.filename_safe(args.election_dirname) e.election_name = args.election_name OpenAuditTool.ELECTIONS_ROOT = args.elections_root if args.set_audit_seed != None: audit.set_audit_seed(e, args.set_audit_seed) if args.read_election_spec: logger.info("read_election_spec") election_spec.read_election_spec(e) elif args.read_reported: logger.info("read_reported") election_spec.read_election_spec(e) reported.read_reported(e) elif args.make_audit_orders: logger.info("make_audit_orders") audit_orders.compute_audit_orders(e) elif args.read_audited: logger.info("read_audited--NO-OP-TBD") elif args.audit: election_spec.read_election_spec(e) reported.read_reported(e) audit.audit(e, args)
def process_args(e, args): e.election_dirname = ids.filename_safe(args.election_dirname) e.election_name = args.election_name ELECTIONS_ROOT = args.elections_root if args.set_audit_seed != None: audit.set_audit_seed(e, args.set_audit_seed) if args.read_election_spec: print("read_election_spec") election_spec.read_election_spec(e) elif args.read_reported: print("read_reported") election_spec.read_election_spec(e) reported.read_reported(e) elif args.make_audit_orders: print("make_audit_orders") audit_orders.compute_audit_orders(e) elif args.read_audited: print("read_audited--NO-OP-TBD") elif args.audit: election_spec.read_election_spec(e) reported.read_reported(e) audit.audit(e, args)
def test_audit_passes_received_events(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "found_events.log")] audit() out, err = capsys.readouterr() assert "found_events.log: pass" in err assert os.path.isfile(os.path.join(td_tmpdir, "found_events.log.audited")) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_handles_empty_log(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "empty.log")] audit() out, err = capsys.readouterr() assert "empty.log: pass" in err assert os.path.isfile(os.path.join(td_tmpdir, "empty.log.audited")) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_respects_dry_run(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "found_events.log"), "--dry-run"] audit() out, err = capsys.readouterr() assert "found_events.log: pass" in err assert not os.path.isfile(os.path.join(td_tmpdir, "found_events.log.validated")) assert os.path.isfile(os.path.join(td_tmpdir, "found_events.log")) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_handles_invalid_json(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "events_with_invalid_json.log")] audit() out, err = capsys.readouterr() assert "fail" in err assert not os.path.isfile( os.path.join(td_tmpdir, "events_with_invalid_json.log.audited") ) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_will_not_overwrite(env_setup, td_tmpdir, capsys): shutil.copy( os.path.join(td_tmpdir, "found_events.log"), os.path.join(td_tmpdir, "found_events.log.audited"), ) with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "found_events.log")] audit() out, err = capsys.readouterr() assert "found_events.log: pass" not in err assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_handles_success_and_failure(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = [ "", os.path.join(td_tmpdir, "found_events.log"), os.path.join(td_tmpdir, "missing_events.log"), ] audit() out, err = capsys.readouterr() assert "pass" in err assert "fail" in err assert os.path.isfile(os.path.join(td_tmpdir, "found_events.log.audited")) assert not os.path.isfile(os.path.join(td_tmpdir, "missing_events.log.audited")) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def test_audit_fails_missing_events(env_setup, td_tmpdir, capsys): with pytest.raises(SystemExit) as pytest_e: sys.argv = ["", os.path.join(td_tmpdir, "missing_events.log")] audit() out, err = capsys.readouterr() # Good data in the test_audit database assert "a1baa562" not in err assert "b834194c" not in err assert "1542f8fd" not in err assert "b93cec5b" not in err # Bad data not in the test_audit database assert "does-not-exist-1" in err assert "does-not-exist-2" in err assert "missing_events.log: fail" in err assert not os.path.isfile(os.path.join(td_tmpdir, "missing_events.log.audited")) assert [pytest_e.type, pytest_e.value.code] == [SystemExit, 0]
def main(): # Definition of Help Text # Argument Parsing parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description='''\ This script takes a Cisco ACL definition and parses it into a searchable spreadsheet Automated audit rules have been included for convenience, but are disabled by default. This is to improve execution time. ======================================================================================''') parser.add_argument('-o', '--out', nargs=1, help='Overwrite the name of the output file', default=['ACL_Parsed.xlsx']) parser.add_argument('-a', '--all', action='store_true', help='Perform all audits') parser.add_argument('-r', '--redundant', action='store_true', help='Perform the redundant rules audit') parser.add_argument('-s', '--shadow', action='store_true', help='Perform the shadowed rules audit -- NOT IMPLEMENTED') parser.add_argument('-x', '--promiscuous', action='store_true', help='Perform the promiscuous rules audit') parser.add_argument('infile', nargs='+', type=argparse.FileType('r'), help='Path to the ACL Definition file (.txt format)') args = parser.parse_args() outfile = str(os.getcwd()) + "/" + str(args.out[0]) # print(args) entries_table = [] errors_table = [] audit_table = [] audit_type = [] for acl in args.infile: entries, errors = parse.parse(acl) entries_table.append(entries[:]) errors_table.append(errors[:]) if args.all: audit_type = [True, True, True] else: audit_type = [args.promiscuous, args.redundant, args.shadow] audit_table = audit.audit(entries_table, audit_type) utils.output_xlsx_file(entries_table, errors_table, audit_type, audit_table, outfile)
def get_classes(): response = request.form["response"].encode('utf-8') print response print len(response) remaining = audit(response) print '****** GOT REMAINING **********' b = request.form["cat"].encode('utf-8') #print b #print '********* FOUND b *************' c = request.form["label"].encode('utf-8') #print c #print '************* FOUND c ************' everything = fetchcourses.fetchAll(b,c) matches = everything[0] randLst.append(everything[0]) #print matches[:-3] print '__________ FILTER LENGTH___________' filters = everything[1] print len(filters) #filters = split(r'(?<=\]),(?=\[)', str(everything[1])) #print filters[0] return render_template('audit_results.html', response=remaining,match=matches,filter=filters) #response=remaining), match = print '*********** SENT ALLL VARS ************'
def load_licenses(path="./licenses", output="licenses.json"): if os.path.exists(output): if os.stat(output).st_mtime < os.stat(__file__).st_mtime: os.unlink(output) else: print("Output file already exists, doing nothing") sys.exit(0) licenses = stream_licenses(path=path) data = list(sorted(validate(merge_stream(licenses)), key=lambda x: x['id'])) with open(output, 'w') as fd: json.dump(data, fd) print("{len} records written out".format(len=len(data))) # Now, let's audit it report = audit.audit(path=output, exit=False) fatal = False for key, values in report.items(): for value in values: if value['fatal']: print("FATAL:", value['id'], value['message'], value) fatal = True if fatal: raise Exception("Fatal error found") for identifier in report['identifiers']: print(" {count:03d} licenses contain scheme {scheme} ({percent:1f}%)". format(**identifier)) for tag in report['tags']: print(" {count:03d} licenses contain tag {tag} ({percent:1f}%)".format( **tag))
def fix_street(osmfile): st_types = audit.audit(osmfile) for st_type, ways in st_types.items(): for name in ways: if st_type in street_type_mapping: better_name = name.replace(st_type, street_type_mapping[st_type]) print(name, "=>", better_name)
def improve_street_name(): st_types = audit.audit(richardson_sample)[0] pprint.pprint(dict(st_types)) for st_type, ways in st_types.items(): for name in ways: better_name = update_name(name, mapping) print(name, "=>", better_name)
def load_licenses(path="./licenses", output="licenses.json"): licenses = stream_licenses(path=path) data = list(sorted(validate(merge_stream(licenses)), key=lambda x: x['id'])) with open(output, 'w') as fd: json.dump(data, fd, sort_keys=True) print("{len} records written out".format(len=len(data))) report = audit.audit(path=output) audit.display_report(report=report)
def main(): filename = 'san-francisco.osm' #filename = 'sf-sampler.osm' #print("\n*** Check the number of tags ***") #taglist = mapparser.count_tags(filename) #pprint.pprint(taglist) #print("\n*** Check k value of each tag ***") #keys = tags.process_map(filename) #pprint.pprint(keys) print("\n*** Audit stree types and city names ***") street_types = defaultdict(set) city_names = set() audit.audit(filename, street_types, city_names) pprint.pprint(dict(street_types)) pprint.pprint(city_names) print("\n*** Convert data *** ") data.process_map(filename, False)
def improve_street_name(): st_types = audit.audit(OSM_FILE) for st_type, ways in st_types.iteritems(): for name in ways: better_name = update_name(name, mapping) print name, "=>", better_name #Second Check replace bad street names with corrected ones if "Streetewart" in better_name: better_name = better_name.replace(" Streetewart", " Stewart") print name, "=>", better_name
def process_map(file_in, pretty = False): # You do not need to change this file audit.audit(file_in) file_out = "{0}.json".format(file_in) data = [] with open(file_out, "wb") as fo: for _, element in ET.iterparse(file_in): el = shape_element(element) if el: #data.append(el) if pretty: fo.write(json.dump(el, indent=2)+"\n") else: fo.write(json.dumps(el) +"\n") fo.flush() return data
def dispatch(e, args): e.election_dirname = ids.filename_safe(args.election_dirname) e.election_name = args.election_name e.num_winners = int(args.num_winners) e.max_num_it = int(args.max_num_it) e.sample_by_size = args.sample_by_size e.use_discrete_rm = args.use_discrete_rm e.pick_county_func = args.pick_county_func OpenAuditTool.ELECTIONS_ROOT = args.elections_root if args.set_audit_seed != None: audit.set_audit_seed(e, args.set_audit_seed) if args.read_election_spec: logger.info("read_election_spec") election_spec.read_election_spec(e) elif args.read_reported: logger.info("read_reported") election_spec.read_election_spec(e) reported.read_reported(e) elif args.make_audit_orders: logger.info("make_audit_orders") audit_orders.compute_audit_orders(e) elif args.read_audited: logger.info("read_audited--NO-OP-TBD") elif args.audit: election_spec.read_election_spec(e) reported.read_reported(e) audit.audit(e, args)
def street_name_check(): ''' Checks street name or suffix of each address and writes output list to file for manual check of common street names ''' street_types = audit.audit(file_location) print("Finished getting street names") output = open("suffix.txt", 'w', errors = 'replace') print("Writing file") suffix_list = sorted(street_types.keys()) for suffix in suffix_list: output.write(suffix + "\n") return
def make_claslisting(): user_id = None case_id = None try: #Load the data data = request.get_json() except Exception as e: raise e if data == {}: return (bad_request()) else: try: user_id = data['user_id'] except: responses = jsonify( message="Error in classifiers listing: user_id is missing") return responses try: case_id = data['case_id'] except: responses = jsonify( message="Error in classifiers listing: case_id is missing") return responses available_classifiers = claslisting(user_id, case_id) data_audit = { "auditEventType": "Start task", "details": { "claslisting": "Lists the available classifiers" }, "principal": "Analyst" } datajson = json.dumps(data_audit) results_audit = audit(datajson) #Send the response codes responses = jsonify(available_classifiers=available_classifiers) responses.status_code = 200 return responses
def main(): # Return original file size print("Original file size:", get_file_size(INPUT_FILE), "MB") # Make a sample file # make_file(INPUT_FILE, TEST_FILE) # Return sample file size print("Original file size:", get_file_size(TEST_FILE), "MB") # Audit addresses st_types, st_types_count = audit(TEST_FILE) # Audit city names city_types, city_types_count = audit_city(TEST_FILE) # Show data with errors print("** Audit Street **") pprint.pprint(dict(st_types)) pprint.pprint(dict(st_types_count)) print("** Audit Cities **") pprint.pprint(dict(city_types)) pprint.pprint(dict(city_types_count)) # Make JSON process_map(TEST_FILE, True)
def load_licenses(path="./licenses", output="licenses.json"): if os.path.exists(output): if os.stat(output).st_mtime < os.stat(__file__).st_mtime: os.unlink(output) else: print("Output file already exists, doing nothing") sys.exit(0) licenses = stream_licenses(path=path) data = list(sorted(validate(merge_stream(licenses)), key=lambda x: x['id'])) with open(output, 'w') as fd: json.dump(data, fd) print("{len} records written out".format(len=len(data))) # Now, let's audit it report = audit.audit(path=output, exit=False) fatal = False for key, values in report.items(): for value in values: if value['fatal']: print("FATAL:", value['id'], value['message'], value) fatal = True if fatal: raise Exception("Fatal error found") for identifier in report['identifiers']: print(" {count:03d} licenses contain scheme {scheme} ({percent:1f}%)".format( **identifier )) for tag in report['keywords']: print(" {count:03d} licenses contain tag {tag} ({percent:1f}%)".format( **tag ))
def make_classifier(): try: #Load the data data = request.get_json() except Exception as e: raise e if data == {}: return (bad_request("There is no data for the training")) else: #Get the text and the language try: lang = data['lang'] except: try: lang = detect_language(data['text']) print(lang) except: responses = jsonify( "Error in vectorize: language field is missing") return responses try: annotated_data = data['annotated_data'] except: responses = jsonify( "Error in classifier: annotated data is missing") return responses try: user_id = data['user_id'] except: responses = jsonify("Error in classifier: user_id is missing") return responses try: case_id = data['case_id'] except: responses = jsonify("Error in classifier: case_id is missing") return responses try: clas_name = data['clas_name'] except: responses = jsonify( "Error in classifier: classifier name is missing") return responses print(len(annotated_data)) if len(annotated_data) < 22: responses = jsonify( "Training data set should have more than 10 samples per each class" ) return responses if lang not in ['en', 'es', 'ar', 'ro', 'fr']: responses = jsonify( "Language not available. Language must be in ['en','es','ar','ro','fr']" ) return responses #Train the new classifier print("Training a new classifier from the user's annotated dataset ") accuracy = classifier(annotated_data, lang, user_id, case_id, clas_name) data_audit = { "auditEventType": "Start task", "details": { "classifier": "Trains a new classifier based on the annotations provided by the user" }, "principal": "Analyst" } datajson = json.dumps(data_audit) results_audit = audit(datajson) #Send the response codes responses = jsonify( message= "Classifier has been saved. Accuracy given in % - calculated using C-10V", accuracy=accuracy) responses.status_code = 200 return responses
def make_sento(): try: #Load the data data = request.get_json() except Exception as e: raise e if data == {}: return (bad_request()) else: #Get the text, language and classifier try: lang = data['lang'] except: try: lang = detect_language(data['text']) print(lang) except: responses = jsonify( "Error in vectorize: language field is missing") return responses try: text = data['text'] except: responses = jsonify("Error in sento: text is missing") return responses try: cls = data['classifier'] except: responses = jsonify("Error in sento: classifier is missing") return responses if lang not in ['en', 'es', 'ar', 'ro', 'fr']: responses = jsonify( "Language not available. Language must be in ['en','es','ar','ro','fr']" ) return responses #Preprocess the text print("Sento analysis") # Probability probability = probability_terror(text, lang, cls) print(probability) # Analyze filename = os.path.join(os.path.dirname(__file__), 'models-registry.json') registry = load_data(filename) analysis = analyze(text, lang, registry) data_audit = { "auditEventType": "Start task", "details": { "sento": "NLP analysis" }, "principal": "Analyst" } datajson = json.dumps(data_audit) results_audit = audit(datajson) #Send the response codes responses = jsonify(probability=probability, concepts=analysis[0], key_ideas=analysis[1], topics=analysis[2]) responses.status_code = 200 return responses
way_tag_surface_values = explore.get_tag_key_values(file, ['way'], 'surface') # print out top 20 entries print("Most frequently occuring tag keys:") pprint.pprint(tag_keys[:20]) print() print("Most frequently occuring way tag surface values:") pprint.pprint(way_tag_surface_values[:20]) ## data auditing bad_house_numbers = [] bad_cities = [] bad_phone_numbers = [] bad_websites = [] audit.audit(file, bad_house_numbers, bad_cities, bad_phone_numbers, bad_websites) print("Audited house numbers:") pprint.pprint(bad_house_numbers[:20]) print() print("Audited cities:") pprint.pprint(bad_cities[:20]) print() print("Audited phone numbers:") pprint.pprint(bad_phone_numbers[:20]) print() print("Audited websites:") pprint.pprint(bad_websites[:20]) print() ## SQL database
from audit import audit LOG_FILE = 'decisions.log' def manual_decision(scenario): print() print(40 * '-') print() print(scenario) print() response = ' ' while response.lower() not in ['a', 'b']: response = input( "Enter 'a' to save the passengers, or 'b' to save the pedestrians: " ) if response.lower() == 'a': return 'passengers' else: return 'pedestrians' if __name__ == '__main__': audit(manual_decision, 60, seed=8675309)
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements #First call the audit function to update the xml data element = audit(element) if element.tag == 'node': for key in node_attr_fields: node_attribs[key] = element.attrib[key] for tag in element.iter('tag'): tag_temp = {} tag_key = tag.attrib['k'] tag_value = tag.attrib['v'] tag_temp['id'] = element.attrib['id'] tag_temp['key'] = tag.attrib['k'] tag_temp['value'] = tag.attrib['v'] if problem_chars.search(tag_key) != None: continue elif LOWER_COLON.search(tag_key) != None: type_no = tag_key.index(':') type_temp = tag_key[0:type_no] tag_temp['type'] = type_temp tag_temp['key'] = tag_key[type_no + 1:len(tag_key)] else: tag_temp['type'] = default_tag_type tags.append(tag_temp) #print {'node': node_attribs, 'node_tags': tags} return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for way_key in way_attr_fields: way_attribs[way_key] = element.attrib[way_key] for way_tag in element.iter('tag'): tag_temp = {} tag_key = way_tag.attrib['k'] tag_value = way_tag.attrib['v'] tag_temp['id'] = element.attrib['id'] tag_temp['key'] = way_tag.attrib['k'] tag_temp['value'] = way_tag.attrib['v'] if LOWER_COLON.search(tag_key) != None: type_no = tag_key.index(':') type_temp = tag_key[0:type_no] tag_temp['type'] = type_temp tag_temp['key'] = tag_key[type_no + 1:len(tag_key)] elif problem_chars.search(tag_key) != None: continue else: tag_temp['type'] = default_tag_type tags.append(tag_temp) way_node_no = 0 for way_node in element.iter('nd'): way_node_temp = {} way_node_temp['id'] = element.attrib['id'] way_node_temp['node_id'] = way_node.attrib['ref'] way_node_temp['position'] = way_node_no way_node_no += 1 way_nodes.append(way_node_temp) #print {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags} return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def improve_phone(): p_types = audit.audit(richardson_sample)[1] #print(p_types) for p in p_types: better_p = update_num(p) print(p, "=>", better_p)
# count how many tags we have print("\nTag count:") pprint(mapparser.count_tags(FILENAME)) # get idea of what kind of fixes we should make print("\nTags issues:") pprint(tags.process_map(FILENAME)) # get idea of unique users in dataset print("\nUnique User Count:\n") print(users.process_map(FILENAME)) # audit the data to see lastly what changes need be made print("\nIdeas for audits that should be made:") pprint(audit.audit(FILENAME)) # lastly, after examining the data, call data.py. print("\nExporting data to csv files:") data.process_map(FILENAME, validate=False) # If you want to load content with CSV in terminal, comment out the # the code below as it's for loading all data into sql from python # 0) Make sure the DB has been created by establishing connection first: insert_data.create_connection(DB_FILE) # 1) split the drop query on ; and execute each of those queries against DB for drop in DROP_QUERY.split(";"): insert_data.update_db(drop, DB_FILE)
from audit import audit def automatic_decision(scenario): # *** YOUR CODE GOES HERE *** # default to saving the passengers return "passengers" if __name__ == '__main__': audit(automatic_decision, 60, seed=8675309)
for i in range(len(query_types)): better_value = update_name(better_value, re_library[i], query_types[i], mappings[i]) elif dictionary == postcode: better_value = update_postcode(better_value, re_library[-3]) elif dictionary == city: better_value = update_city_name(better_value, re_library[-2]) elif dictionary == cuisine: better_value = update_cuisine(better_value, re_library[-1]) # Only print single-value tags single_tag = all([char not in value for char in [';', ',', ':']]) # Also avoid printing generic tags, like 'meat' or 'steak' if (value != better_value) & (single_tag) \ & (value not in ['meat', 'steak', 'steak_house']): print(value, '->', better_value) # Comment the remaining lines of code to suppress output filename = 'milan_italy_sample.osm' street, postcode, city, cuisine = audit.audit(filename, re_library) print('\nSTREET FEATURES:') print_library(street, re_library, query_types, mappings) print('\nPOSTCODE FEATURES:') print_library(postcode, re_library) print('\nCITY FEATURES:') print_library(city, re_library) print("\nCUISINE FEATURES:") print("NOTE: Multiple tags and generic words cleaned in 'data.py'.") print_library(cuisine, re_library)
def start(): log.log("Starting audit...",0) audit.audit() log.log("Starting backup...",0) backup.backup()
def test_audit(self): self.assertEqual(audit.audit('gold'), ['test2', 'test1'])