Python create_indexの例、search.create_index Pythonの例

コード例 #1

0

ファイルを表示

def index_products():
    '''View to index the products.json file.'''

    create_index()

    return jsonify({
        'message': 'Indexing complete!',
    })

コード例 #2

0

ファイルを表示

ファイル: test.py プロジェクト: HOIg3r/LINFO1101-Intro-a-la-progra

 def test():
     assert s.readfile("text_example_1.txt")
     assert s.readfile("")
     assert s.readfile("text_example_2.txt")
     assert s.get_words("gun fournaise mauvaise, situation !")
     assert s.get_words("")
     assert s.create_index("text_example_1.txt")
     assert s.create_index("text_example_2.txt")
     assert s.create_index("")
     assert s.get_lines("situation", "text_example_1.txt")
     assert s.get_lines("wallon", "text_example_2.txt")
     assert s.get_lines("", "")

コード例 #3

0

ファイルを表示

ファイル: create_es_index.py プロジェクト: delving/nave

    def handle(self, *args, **options):
        index_name = args[0]
        alias = args[1] if len(args) > 1 else None

        self.stdout.write('Started creating index for {} with alias: {}'.format(index_name, alias))

        from search import create_index
        create_index(
                index_name=index_name,
                aliases=alias
        )

        self.stdout.write('Finished creating index for {} with alias: {}'.format(index_name, alias))

コード例 #4

0

ファイルを表示

ファイル: update_memopol_index.py プロジェクト: Bouska/memopol2

 def handle_noargs(self, **options):
     from meps import models
     from mps import models
     from reps import models
     from search import create_index
     from search import Searchables
     from search import update_index
     import shutil
     if os.path.isdir(settings.WHOOSH_INDEX):
         shutil.rmtree(settings.WHOOSH_INDEX)
     create_index()
     for klass in Searchables.items:
         print 'indexing %s' % klass.__name__
         for i in klass.objects.all():
             update_index(None, i, created=False)

コード例 #5

0

ファイルを表示

    def rebuild_index(self, dbdir):
        """Convert and index data files for random access.
           Index movie list for searching."""
        # Import and index data files
        if os.path.exists(self.dbfile):
            raise Exception('%s exists' % self.dbfile)
        for parsername, parser in parsers.parsers():
            obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug)
            # if self.debug:
            #     print "Indexing %s..." % parsername
            with Timer(indent=2, quiet=not self.debug):
                obj.rebuild_index(do_copy=True)

        # Create index of movie titles
        if self.debug:
            print "1 Creating search index..."
        with Timer(indent=2, quiet=not self.debug):
            search.create_index(self.dbfile, dbdir, debug=self.debug)

コード例 #6

0

ファイルを表示

ファイル: __init__.py プロジェクト: Madhumithas/python-imdb

    def rebuild_index(self, dbdir):
        """Convert and index data files for random access.
           Index movie list for searching."""
        # Import and index data files
        if os.path.exists(self.dbfile):
            raise Exception('%s exists' % self.dbfile)
        for parsername, parser in parsers.parsers():
            obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug)
            if self.debug:
                print "Indexing %s..." % parsername
            with Timer(indent=2, quiet=not self.debug):
                obj.rebuild_index(do_copy=True)

        # Create index of movie titles
        if self.debug:
            print "Creating search index..."
        with Timer(indent=2, quiet=not self.debug):
            search.create_index(self.dbfile, dbdir, debug=self.debug)

コード例 #7

0

ファイルを表示

def setup():
    print('creating index')
    index = search.create_index('Server/calvin_transcript.txt', 5)
    print('finished creating index, now registering api resources')
    api.add_resource(ComicResource,
                     '/comic/<string:date>')  # date format: YYYY-MM-DD
    api.add_resource(SearchResource,
                     '/search/<string:query>',
                     resource_class_args=(index, ))

コード例 #8

0

ファイルを表示

ファイル: server.py プロジェクト: docileninja/Calvin-and-Hobbes-Viewer

def setup():
  print('creating index')
  index = search.create_index('Server/calvin_transcript.txt', 5)
  print('finished creating index, now registering api resources')
  api.add_resource(ComicResource,
                   '/comic/<string:date>')  # date format: YYYY-MM-DD
  api.add_resource(SearchResource,
                   '/search/<string:query>',
                   resource_class_args=(index,))

コード例 #9

0

ファイルを表示

def main():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
        'Pragma': 'no-cache'
    }
    url = 'https://www.allrecipes.com/recipes/96/salad/'
    links = sd.parse_website(url, headers)

    if links:
        if len(links) > 0:
            es = se.connect_elasticsearch()

        for link in links:
            sleep(2)
            result = sd.parse(link['href'], headers)
            if es is not None:
                if se.create_index(es, 'recipes'):
                    out = se.store_record(es, 'recipes', result)
                    print('Data indexed successfully')

コード例 #10

0

ファイルを表示

ファイル: main.py プロジェクト: Johnetordoff/get_blame

def reset_index():
    search.delete_index()
    search.create_index()

コード例 #11

0

ファイルを表示

ファイル: generate_db.py プロジェクト: njbbaer/give-get-green

    return postings


def write_database(postings):
    conn = sqlite3.connect('givegetgreen_db')
    c = conn.cursor()
    for post in postings:
        # name email phone address category description title
        c.execute('''INSERT INTO posting_posting (
				id,
				name,
				email,
				phone,
				address,
				category,
				description,
				title
			) VALUES (null, '%s', '%s', '%s', '%s', '%s', '%s', '%s')''' %
                  (post['name'], post['email'], post['phone'], post['address'],
                   post['category'], post['description'], post['title']))
    conn.commit()
    conn.close()
    print str(len(postings)) + " postings written to database"


if __name__ == '__main__':

    postings = generate_postings(NUM_GENERATE)
    write_database(postings)
    search.create_index("indexdir", "givegetgreen_db")

コード例 #12

0

ファイルを表示


if __name__ == '__main__':
    index = None  # the inverted index dictionary
    num_doc = None  # number of documents
    num_uniq = None

    if fileIO.index_file_exists():  # read prebuilt index from final.txt
        print('Reading index from file...')
        index = fileIO.read_index_from_file()
        num_doc = fileIO.read_num_doc_from_file()
        num_uniq = len(index)

    else:  # create a new index structure from scratch (takes approx. 20 minutes)
        print('Creating index...')
        index, num_doc = search.create_index()
        num_uniq = len(index)

        # calculate tf-idf values for each term in completed index (and store them in each subdictionary)
        # ONLY CALCULATE IDF VALUES RIGHT AFTER INDEX CREATION; reading from final.json may already have idf values
        for k, v in index.items():  # k: term v: term's subdictionary
            idf = 0
            if type(index[k]) == float:
                print(index[k])
            if len(index[k]) != 0:  # avoid divide by 0 error
                idf += math.log(num_doc / len(index[k]))
            #print('Calculated idf as:', idf)

            # store idf value as first key in term's subdictionary, under the key 'idf'
            index[k]['idf'] = str(idf)
        fileIO.write_index_to_file(index)

コード例 #13

0

ファイルを表示

from search import remove_index, create_index

from init_app import app
with app.app_context():
    remove_index("bill")
    create_index()

コード例 #14

0

ファイルを表示

def main(args):
    """Parses arguments and runs the correct command.
    """
    parser = argparse.ArgumentParser(
        description="""CYBOK: The Cyber Security Body of Knowledge
                       for Model-Based Vulnerability Assessment""")

    parser.add_argument("-s",
                        "--search",
                        help="search attack vectors from database")
    parser.add_argument(
        "-r",
        "--rank",
        help="ranks results based on how many times they match",
        action="store_true")
    parser.add_argument("-a",
                        "--abstract",
                        help="abstracts from CVE to CWE and CAPEC",
                        action="store_true")
    parser.add_argument("-i",
                        "--input",
                        help="inputs system model from graphml file")
    parser.add_argument("-u",
                        "--update",
                        help="updates CAPEC, CWE, CVE data files",
                        action="store_true")
    parser.add_argument("-v",
                        "--visualize",
                        help="plots the graph of attack vectors",
                        action="store_true")
    parser.add_argument(
        "-t",
        "--target",
        help="inputs target to find exploit chains based on evidence")
    parser.add_argument(
        "-o",
        "--output",
        help="outputs the results of -s or -i to csv and graphml files")

    args = parser.parse_args(args)

    if args.search:
        matched_attack_vectors = search(args.search)

        ranked_attack_vectors = rank_results(matched_attack_vectors)

        if args.output:
            with open(args.output + '_' + args.search + ".csv",
                      "w") as results_file:
                writer = csv.writer(results_file)
                writer.writerow(
                    ('Hits', 'Attack Vector', 'Database', 'Related CWE',
                     'Related CAPEC', 'Related CVE', 'Contents'))

                for ranked_attack_vector in ranked_attack_vectors:
                    writer.writerow(
                        (ranked_attack_vector[1], ranked_attack_vector[0].name,
                         ranked_attack_vector[0].db_name,
                         ranked_attack_vector[0].related_weakness,
                         ranked_attack_vector[0].related_attack_pattern,
                         ranked_attack_vector[0].related_vulnerability,
                         ranked_attack_vector[0].contents))
        else:
            for ranked_attack_vector in ranked_attack_vectors:
                pprint_attack_vector(ranked_attack_vector[0],
                                     str(ranked_attack_vector[1]))

    if args.input:
        sigma = system_graph(args.input)

        violated_components = find_violated_components(sigma)

        if args.output:
            nx.write_graphml(sigma, args.output + "_system_topology.graphml")
            av_graph = attack_vector_graph(violated_components)
            nx.write_graphml(av_graph,
                             args.output + "_attack_vector_graph.graphml")

            with open(args.output + '_' + "full_analysis.csv",
                      "w") as results_file:
                writer = csv.writer(results_file)
                writer.writerow(
                    ('Violated Component', 'Hits for Component',
                     'Attack Vector', 'Database', 'ID', 'Related CWE',
                     'Related CAPEC', 'Related CVE', 'Contents'))

                for violated_component in violated_components:
                    violated_component[1] = rank_results(violated_component[1])
                    for piece in violated_component[1]:
                        try:
                            writer.writerow(
                                (violated_component[0], piece[1],
                                 piece[0].name, piece[0].db_name,
                                 piece[0].db_id, piece[0].related_weakness,
                                 piece[0].related_attack_pattern,
                                 piece[0].related_vulnerability,
                                 piece[0].contents))
                        except:
                            print("Error [1]")

            as_sigma = copy.deepcopy(sigma)
            attack_surface, evidence = find_attack_surface(as_sigma)
            as_graph = attack_surface_graph(as_sigma, attack_surface)

            if attack_surface == []:
                print(
                    "I could not find any entry points in the system. This does not mean there are not any."
                )
            else:
                nx.write_graphml(as_graph,
                                 args.output + "_attack_surface_graph.graphml")

                with open(args.output + '_' + "_attack_surface_evidence.csv",
                          "w") as results_file:
                    writer = csv.writer(results_file)
                    writer.writerow(
                        ('Attack Surface Elements', 'Attack Vector',
                         'Database', 'ID', 'Related CWE', 'Related CAPEC',
                         'Related CVE', 'Contents'))
                    for violated_component in attack_surface:
                        as_element = violated_component[
                            1] + " -> " + violated_component[0]
                        for piece in evidence:
                            if piece[0] == violated_component[0]:
                                try:
                                    writer.writerow(
                                        (as_element, piece[1].name,
                                         piece[1].db_name, piece[1].db_id,
                                         piece[1].related_weakness,
                                         piece[1].related_attack_pattern,
                                         piece[1].related_vulnerability,
                                         piece[1].contents))
                                except:
                                    print("Error [2]")
        else:
            # searches each component's descriptors for vulnerabilities
            print("\n\rFull system analysis")
            print("====================")

            for violated_component in violated_components:
                pprint_component(violated_component[0])
                violated_component[1] = rank_results(violated_component[1])
                for piece in violated_component[1]:
                    if args.abstract:
                        if piece.db_name == "CVE":
                            continue
                        else:
                            pprint_attack_vector(piece)
                    else:
                        pprint_attack_vector(piece[0], str(piece[1]))

            # finds the attack surface
            print("\n\rAttack surface analysis")
            print("=======================")

            # we deepcopy sigma as not modify the initial graph object needed
            # for the system topology graph visualization
            as_sigma = copy.deepcopy(sigma)

            attack_surface, evidence = find_attack_surface(as_sigma)
            as_graph = attack_surface_graph(as_sigma, attack_surface)

            if attack_surface == []:
                print(
                    "I could not find any entry points in the system. This does not mean there are not any."
                )
            else:
                for violated_component in attack_surface:
                    pprint_component(violated_component[1] + " -> " +
                                     violated_component[0])
                    for piece in evidence:
                        if piece[0] == violated_component[0]:
                            if args.abstract:
                                if piece[1].db_name == "CVE":
                                    continue
                                else:
                                    pprint_attack_vector(piece[1])
                            else:
                                pprint_attack_vector(piece[1])

            # find exploit chains
            if args.target:
                exploit_chains = find_exploit_chains(as_graph, attack_surface,
                                                     violated_components,
                                                     args.target)
                print("\n\rExploit chain analysis")
                print("======================\n\r")

                chain = ""
                for exploit_chain in exploit_chains:
                    first = True
                    for element in exploit_chain:
                        if first:
                            chain += str(element)
                            first = False
                        else:
                            chain += " -> " + str(element)
                        print(chain)
                        chain = ""

            if args.visualize:
                print(sigma)
                plot_system_topology(sigma)
                plot_attack_surface(as_graph, attack_surface)
                if args.target:
                    plot_exploit_chains(exploit_chains, as_graph, args.target)
                plt.show()

    if args.update:
        print("Updating MITRE CAPEC\n", flush=True)
        update_capec()
        print("Updated MITRE CAPEC\n", flush=True)

        print("Updating MITRE CWE\n", flush=True)
        update_cwe()
        print("Updated MITRE CWE\n", flush=True)

        print("Updating NVD CVE\n", flush=True)
        update_cve()
        print("Updated NVD CVE\n", flush=True)

        print("Parsing attack vectors\n\r", flush=True)
        attack_vectors = attack_vector_cross()
        print("I found %d attack vectors.\n\r" % len(attack_vectors))

        print("Creating search index, this might take a while\n\r", flush=True)
        create_index(attack_vectors)
        print("Created search index\n\r", flush=True)

コード例 #15

0

ファイルを表示

def test_get_lines():
    index = search.create_index("test_example_2.txt")
    words = ["the", "republic"]
    assert search.get_lines(words, index) == [261, 282]

コード例 #16

0

ファイルを表示

def index_sightings():
    create_index()
    return jsonify({
        'message': 'Indexing complete!',
    })