def index_products(): '''View to index the products.json file.''' create_index() return jsonify({ 'message': 'Indexing complete!', })
def test(): assert s.readfile("text_example_1.txt") assert s.readfile("") assert s.readfile("text_example_2.txt") assert s.get_words("gun fournaise mauvaise, situation !") assert s.get_words("") assert s.create_index("text_example_1.txt") assert s.create_index("text_example_2.txt") assert s.create_index("") assert s.get_lines("situation", "text_example_1.txt") assert s.get_lines("wallon", "text_example_2.txt") assert s.get_lines("", "")
def handle(self, *args, **options): index_name = args[0] alias = args[1] if len(args) > 1 else None self.stdout.write('Started creating index for {} with alias: {}'.format(index_name, alias)) from search import create_index create_index( index_name=index_name, aliases=alias ) self.stdout.write('Finished creating index for {} with alias: {}'.format(index_name, alias))
def handle_noargs(self, **options): from meps import models from mps import models from reps import models from search import create_index from search import Searchables from search import update_index import shutil if os.path.isdir(settings.WHOOSH_INDEX): shutil.rmtree(settings.WHOOSH_INDEX) create_index() for klass in Searchables.items: print 'indexing %s' % klass.__name__ for i in klass.objects.all(): update_index(None, i, created=False)
def rebuild_index(self, dbdir): """Convert and index data files for random access. Index movie list for searching.""" # Import and index data files if os.path.exists(self.dbfile): raise Exception('%s exists' % self.dbfile) for parsername, parser in parsers.parsers(): obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug) # if self.debug: # print "Indexing %s..." % parsername with Timer(indent=2, quiet=not self.debug): obj.rebuild_index(do_copy=True) # Create index of movie titles if self.debug: print "1 Creating search index..." with Timer(indent=2, quiet=not self.debug): search.create_index(self.dbfile, dbdir, debug=self.debug)
def rebuild_index(self, dbdir): """Convert and index data files for random access. Index movie list for searching.""" # Import and index data files if os.path.exists(self.dbfile): raise Exception('%s exists' % self.dbfile) for parsername, parser in parsers.parsers(): obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug) if self.debug: print "Indexing %s..." % parsername with Timer(indent=2, quiet=not self.debug): obj.rebuild_index(do_copy=True) # Create index of movie titles if self.debug: print "Creating search index..." with Timer(indent=2, quiet=not self.debug): search.create_index(self.dbfile, dbdir, debug=self.debug)
def setup(): print('creating index') index = search.create_index('Server/calvin_transcript.txt', 5) print('finished creating index, now registering api resources') api.add_resource(ComicResource, '/comic/<string:date>') # date format: YYYY-MM-DD api.add_resource(SearchResource, '/search/<string:query>', resource_class_args=(index, ))
def setup(): print('creating index') index = search.create_index('Server/calvin_transcript.txt', 5) print('finished creating index, now registering api resources') api.add_resource(ComicResource, '/comic/<string:date>') # date format: YYYY-MM-DD api.add_resource(SearchResource, '/search/<string:query>', resource_class_args=(index,))
def main(): headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Pragma': 'no-cache' } url = 'https://www.allrecipes.com/recipes/96/salad/' links = sd.parse_website(url, headers) if links: if len(links) > 0: es = se.connect_elasticsearch() for link in links: sleep(2) result = sd.parse(link['href'], headers) if es is not None: if se.create_index(es, 'recipes'): out = se.store_record(es, 'recipes', result) print('Data indexed successfully')
def reset_index(): search.delete_index() search.create_index()
return postings def write_database(postings): conn = sqlite3.connect('givegetgreen_db') c = conn.cursor() for post in postings: # name email phone address category description title c.execute('''INSERT INTO posting_posting ( id, name, email, phone, address, category, description, title ) VALUES (null, '%s', '%s', '%s', '%s', '%s', '%s', '%s')''' % (post['name'], post['email'], post['phone'], post['address'], post['category'], post['description'], post['title'])) conn.commit() conn.close() print str(len(postings)) + " postings written to database" if __name__ == '__main__': postings = generate_postings(NUM_GENERATE) write_database(postings) search.create_index("indexdir", "givegetgreen_db")
if __name__ == '__main__': index = None # the inverted index dictionary num_doc = None # number of documents num_uniq = None if fileIO.index_file_exists(): # read prebuilt index from final.txt print('Reading index from file...') index = fileIO.read_index_from_file() num_doc = fileIO.read_num_doc_from_file() num_uniq = len(index) else: # create a new index structure from scratch (takes approx. 20 minutes) print('Creating index...') index, num_doc = search.create_index() num_uniq = len(index) # calculate tf-idf values for each term in completed index (and store them in each subdictionary) # ONLY CALCULATE IDF VALUES RIGHT AFTER INDEX CREATION; reading from final.json may already have idf values for k, v in index.items(): # k: term v: term's subdictionary idf = 0 if type(index[k]) == float: print(index[k]) if len(index[k]) != 0: # avoid divide by 0 error idf += math.log(num_doc / len(index[k])) #print('Calculated idf as:', idf) # store idf value as first key in term's subdictionary, under the key 'idf' index[k]['idf'] = str(idf) fileIO.write_index_to_file(index)
from search import remove_index, create_index from init_app import app with app.app_context(): remove_index("bill") create_index()
def main(args): """Parses arguments and runs the correct command. """ parser = argparse.ArgumentParser( description="""CYBOK: The Cyber Security Body of Knowledge for Model-Based Vulnerability Assessment""") parser.add_argument("-s", "--search", help="search attack vectors from database") parser.add_argument( "-r", "--rank", help="ranks results based on how many times they match", action="store_true") parser.add_argument("-a", "--abstract", help="abstracts from CVE to CWE and CAPEC", action="store_true") parser.add_argument("-i", "--input", help="inputs system model from graphml file") parser.add_argument("-u", "--update", help="updates CAPEC, CWE, CVE data files", action="store_true") parser.add_argument("-v", "--visualize", help="plots the graph of attack vectors", action="store_true") parser.add_argument( "-t", "--target", help="inputs target to find exploit chains based on evidence") parser.add_argument( "-o", "--output", help="outputs the results of -s or -i to csv and graphml files") args = parser.parse_args(args) if args.search: matched_attack_vectors = search(args.search) ranked_attack_vectors = rank_results(matched_attack_vectors) if args.output: with open(args.output + '_' + args.search + ".csv", "w") as results_file: writer = csv.writer(results_file) writer.writerow( ('Hits', 'Attack Vector', 'Database', 'Related CWE', 'Related CAPEC', 'Related CVE', 'Contents')) for ranked_attack_vector in ranked_attack_vectors: writer.writerow( (ranked_attack_vector[1], ranked_attack_vector[0].name, ranked_attack_vector[0].db_name, ranked_attack_vector[0].related_weakness, ranked_attack_vector[0].related_attack_pattern, ranked_attack_vector[0].related_vulnerability, ranked_attack_vector[0].contents)) else: for ranked_attack_vector in ranked_attack_vectors: pprint_attack_vector(ranked_attack_vector[0], str(ranked_attack_vector[1])) if args.input: sigma = system_graph(args.input) violated_components = find_violated_components(sigma) if args.output: nx.write_graphml(sigma, args.output + "_system_topology.graphml") av_graph = attack_vector_graph(violated_components) nx.write_graphml(av_graph, args.output + "_attack_vector_graph.graphml") with open(args.output + '_' + "full_analysis.csv", "w") as results_file: writer = csv.writer(results_file) writer.writerow( ('Violated Component', 'Hits for Component', 'Attack Vector', 'Database', 'ID', 'Related CWE', 'Related CAPEC', 'Related CVE', 'Contents')) for violated_component in violated_components: violated_component[1] = rank_results(violated_component[1]) for piece in violated_component[1]: try: writer.writerow( (violated_component[0], piece[1], piece[0].name, piece[0].db_name, piece[0].db_id, piece[0].related_weakness, piece[0].related_attack_pattern, piece[0].related_vulnerability, piece[0].contents)) except: print("Error [1]") as_sigma = copy.deepcopy(sigma) attack_surface, evidence = find_attack_surface(as_sigma) as_graph = attack_surface_graph(as_sigma, attack_surface) if attack_surface == []: print( "I could not find any entry points in the system. This does not mean there are not any." ) else: nx.write_graphml(as_graph, args.output + "_attack_surface_graph.graphml") with open(args.output + '_' + "_attack_surface_evidence.csv", "w") as results_file: writer = csv.writer(results_file) writer.writerow( ('Attack Surface Elements', 'Attack Vector', 'Database', 'ID', 'Related CWE', 'Related CAPEC', 'Related CVE', 'Contents')) for violated_component in attack_surface: as_element = violated_component[ 1] + " -> " + violated_component[0] for piece in evidence: if piece[0] == violated_component[0]: try: writer.writerow( (as_element, piece[1].name, piece[1].db_name, piece[1].db_id, piece[1].related_weakness, piece[1].related_attack_pattern, piece[1].related_vulnerability, piece[1].contents)) except: print("Error [2]") else: # searches each component's descriptors for vulnerabilities print("\n\rFull system analysis") print("====================") for violated_component in violated_components: pprint_component(violated_component[0]) violated_component[1] = rank_results(violated_component[1]) for piece in violated_component[1]: if args.abstract: if piece.db_name == "CVE": continue else: pprint_attack_vector(piece) else: pprint_attack_vector(piece[0], str(piece[1])) # finds the attack surface print("\n\rAttack surface analysis") print("=======================") # we deepcopy sigma as not modify the initial graph object needed # for the system topology graph visualization as_sigma = copy.deepcopy(sigma) attack_surface, evidence = find_attack_surface(as_sigma) as_graph = attack_surface_graph(as_sigma, attack_surface) if attack_surface == []: print( "I could not find any entry points in the system. This does not mean there are not any." ) else: for violated_component in attack_surface: pprint_component(violated_component[1] + " -> " + violated_component[0]) for piece in evidence: if piece[0] == violated_component[0]: if args.abstract: if piece[1].db_name == "CVE": continue else: pprint_attack_vector(piece[1]) else: pprint_attack_vector(piece[1]) # find exploit chains if args.target: exploit_chains = find_exploit_chains(as_graph, attack_surface, violated_components, args.target) print("\n\rExploit chain analysis") print("======================\n\r") chain = "" for exploit_chain in exploit_chains: first = True for element in exploit_chain: if first: chain += str(element) first = False else: chain += " -> " + str(element) print(chain) chain = "" if args.visualize: print(sigma) plot_system_topology(sigma) plot_attack_surface(as_graph, attack_surface) if args.target: plot_exploit_chains(exploit_chains, as_graph, args.target) plt.show() if args.update: print("Updating MITRE CAPEC\n", flush=True) update_capec() print("Updated MITRE CAPEC\n", flush=True) print("Updating MITRE CWE\n", flush=True) update_cwe() print("Updated MITRE CWE\n", flush=True) print("Updating NVD CVE\n", flush=True) update_cve() print("Updated NVD CVE\n", flush=True) print("Parsing attack vectors\n\r", flush=True) attack_vectors = attack_vector_cross() print("I found %d attack vectors.\n\r" % len(attack_vectors)) print("Creating search index, this might take a while\n\r", flush=True) create_index(attack_vectors) print("Created search index\n\r", flush=True)
def test_get_lines(): index = search.create_index("test_example_2.txt") words = ["the", "republic"] assert search.get_lines(words, index) == [261, 282]
def index_sightings(): create_index() return jsonify({ 'message': 'Indexing complete!', })