Example #1
0
def load_and_search():
    with open("ac_trie", "r+b") as bf:
        mm = mmap.mmap(bf.fileno(), 0)
    ac_first = AC.from_buff(mm, copy=False)  # it shares memory
    ac_second = AC.from_buff(mm, copy=False)  # it shares memory
    ac_third = AC.from_buff(mm, copy=False)  # it shares memory
    ac_four = AC.from_buff(mm, copy=False)  # it shares memory
    print("Matches after loading shared buffer:")
    for id, start, end in ac_first.match(string_to_search):
        print(id, string_to_search[start:end])
Example #2
0
 def test_buff_ac(self):
     ac = AC.build([u"aİ", u"aaİ", u"aai̇", u"aai̇bİ"], True)
     ac.save("ac.bin")
     with open("ac.bin", "rb") as fi:
         bs = bytearray(fi.read())
     self.assertEqual(len(bs), ac.buff_size())
     bs2 = bytearray(ac.buff_size())
     ac.to_buff(bs2)
     self.assertEqual(bs2, bs)
     self._check_ac_correct(AC.from_buff(bs2, copy=True))
     self._check_ac_correct(AC.from_buff(bs2, copy=False))
Example #3
0
    def test_ignore_case(self):
        if sys.version_info.major < 3:
            return
        ac = AC.build([u"aİ", u"aİİ", u"aai̇", u"aai̇bİ"], True)
        arr = [(end_, val) for val, start_, end_ in ac.match(u"aai̇bİa")]
        self.assertEqual(arr, [(4, 2), (4, 0), (6, 3)])

        ac = AC.build([u"aİ", u"aaİ", u"aai̇", u"aai̇bİ"], True)
        self.assertEqual(ac.size, 3)
        arr = [(end_, val) for val, start_, end_ in ac.match(u"aai̇bİa")]
        self.assertEqual(arr, [(4, 1), (4, 0), (6, 2)])
Example #4
0
 def test_ignore_case_sep(self):
     if sys.version_info.major < 3:
         return
     ac = AC.build([u"aİ", u"aaİ", u"aai̇", u"aai̇bİ"], True)
     sep = set([ord(" ")])
     arr = [(end_, val) for val, start_, end_ in ac.match(u"aai̇bİ", sep)]
     self.assertEqual(arr, [(6, 2)])
Example #5
0
 def test_pickle_ac(self):
     ac = AC.build([u"aİ", u"aaİ", u"aai̇", u"aai̇bİ"], True)
     with open("ac.pkl", "wb") as fo:
         pickle.dump(ac, fo)
     with open("ac.pkl", "rb") as fi:
         ac = pickle.load(fi)
     self.assertEqual(ac.size, 3)
     arr = [(end_, val) for val, start_, end_ in ac.match(u"aai̇bİa")]
     self.assertEqual(arr, [(4, 1), (4, 0), (6, 2)])
Example #6
0
 def build(self, pages: Iterable[Vertex]):
     key2pages = {}
     for page in pages:
         key = _clean_title(page["title"]).lower()
         key_pages = key2pages.setdefault(key, [])
         key_pages.append(page.index)
     self._ac = AC.build(key2pages.keys(), ignore_case=True)
     for key, id_ in self._ac.items():
         self._id2pages.setdefault(id_, key2pages[key])
Example #7
0
 def test_save(self):
     ac_file = "ac_cyac"
     with open(TestAC.words_file, "r", encoding="utf-8") as f:
         words = list(f)
     ac = AC.build(words)
     ac_to_be_saved_size = ac.buff_size()
     ac.save(ac_file)
     ac_saved_size = Path(ac_file).stat().st_size
     self.assertEqual(ac_to_be_saved_size, ac_saved_size)
     os.remove(ac_file)
def get_shared_memory_find_matches(processname, shared_memory_tag, ac_size):
    shm = shared_memory.SharedMemory(shared_memory_tag)
    AC_in_bytes = shm.buf[0:ac_size]
    ac_in_process = AC.from_buff(AC_in_bytes, copy=False)
    string_to_search = "asdpythonasdasdruby"
    print("Executing search in {}".format(processname))
    for id, start, end in ac_in_process.match(string_to_search):
        print(id, string_to_search[start:end])
    #time.sleep(100)
    ac_in_process = None
    AC_in_bytes.release() # MUST release memory beforing closing shm insance, otherwise error is raised.
    shm.close()
from cyac import AC

# python dictionary with trie patterns
file_words = json.load(open("../static_ioc_sample_30k.txt", "r"))
words_to_search = list()
trie_words = list()

total_words_to_search = 1000
total_words_added = 0

t = list()
patterns = dict()
total_initial_words = 0
total_iterations = 10
for x in range(0, total_iterations):
    print("In iteration ", x)
    for key in file_words:
        for value in file_words[key]:
            value = value + str(random.randint(10000, 500000))
            if total_words_to_search != total_words_added:
                words_to_search.append(value)
                total_words_added += 1
            if x == 0:
                total_initial_words += 1
            t.append(value)

print(f"Initial words {total_initial_words}")
print(f"Total patterns on AC trie: {total_initial_words*total_iterations+1}")
ac = AC.build(t)
input()  #stop program to measure memory
Example #10
0
 def test_sep(self):
     ac = AC.build([u"a", u"aa", u"A", u"AA"])
     sep = set([ord(" ")])
     arr = [(end_, val) for val, _, end_ in ac.match(u"a aaa", sep)]
     self.assertEqual(arr, [(1, 0)])
Example #11
0
 def test_init(self):
     ac = AC.build([u'我', u'我是', u'是中'])
     arr = [(end_, val) for val, start_, end_ in ac.match(u"我是中国人")]
     self.assertEqual(arr, [(1, 0), (2, 1), (3, 2)])
from multiprocessing import Process
from multiprocessing import shared_memory


ac_patterns = ["python", "ruby"]
patterns_dict = json.loads(open("static_ioc_sample.json", "r").read())
total_patterns = 0
for x in range(0,1000):
    for key in patterns_dict:
        for value in patterns_dict[key]:
            total_patterns += 1
            ac_patterns.append(value+str(random.randint(0,10000)))
string_to_search = "asdpythonasdasdruby"

# Export to file
ac = AC.build(ac_patterns)
ac.save("ac_trie")
print("Matches before saving:")
for id, start, end in ac.match(string_to_search):
    print(id, string_to_search[start:end])
# Force garbage collector, to avoid possible copy to child processes.
string_to_search = None  
ac = None
ac_patterns = None

# Load from file with mmap and share memory
with open("ac_trie", "r+b") as bf:
    mm = mmap.mmap(bf.fileno(), 0)
ac_first = AC.from_buff(mm, copy=False)
ac_size = ac_first.buff_size()
print("Size of AC automaton:Total patterns: {}bytes:{}patterns".format(ac_size, total_patterns))
Example #13
0
def init_ac(words, size):
    ret = AC.build(words[:size])
    return ret
Example #14
0
import random
from cyac import AC
from memory_profiler import profile

ac_patterns = ["python", "ruby"]
patterns_dict = json.loads(open("static_ioc_sample.json", "r").read())
total_patterns = 0
for x in range(0, 1000):
    for key in patterns_dict:
        for value in patterns_dict[key]:
            total_patterns += 1
            ac_patterns.append(value + str(random.randint(0, 10000)))
string_to_search = "asdpythonasdasdruby"

# Export to file
ac = AC.build(ac_patterns)
ac.save("ac_trie")
print("Matches before saving:")
for id, start, end in ac.match(string_to_search):
    print(id, string_to_search[start:end])


# Load from file with mmap and share memory
@profile
def load_and_search():
    with open("ac_trie", "r+b") as bf:
        mm = mmap.mmap(bf.fileno(), 0)
    ac_first = AC.from_buff(mm, copy=False)  # it shares memory
    ac_second = AC.from_buff(mm, copy=False)  # it shares memory
    ac_third = AC.from_buff(mm, copy=False)  # it shares memory
    ac_four = AC.from_buff(mm, copy=False)  # it shares memory