Beispiel #1
0
    def test_32bit_basic_string( self ):
        solution = self._load_solutions('solution_hash32_seed0.txt')

        with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file:
            for l in test_file.readlines():
                s = solution[l]
                r = pymmh3.hash( l )
                self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
Beispiel #2
0
    def test_32bit_custom_seed_bytearray( self ):
        solution = self._load_solutions('solution_hash32_seed1234ABCD.txt')

        with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file:
            for l in test_file.readlines():
                s = solution[l]
                r = pymmh3.hash( bytearray( l ), seed = 0x1234ABCD )
                self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
Beispiel #3
0
    def test_32bit_custom_seed_bytearray( self ):
        solution = self._load_solutions('solution_hash32_seed1234ABCD.txt', 10)

        with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file:
            for l in test_file.readlines():
                s = solution[l]
                r = pymmh3.hash( bytearray( l ), seed = 0x1234ABCD )
                self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
Beispiel #4
0
 def add_server(self, key):
     if key not in self.server_keys:
         server_node_index = pymmh3.hash(key) % self.size
         self.server_keys_dict[key] = server_node_index
         self.server_keys.append(key)
         self.nodes[server_node_index] = key
         self.server_node_loc.append(server_node_index)
         self.server_node_loc.sort()
Beispiel #5
0
def get_x(csv_row, D):
    fullind = []
    for key, value in csv_row.items():
        s = key + '=' + value
        fullind.append(hash(s) % D)  # weakest hash ever ?? Not anymore :P

    x = {}
    for index in fullind:
        if (not x.has_key(index)):
            x[index] = 0
        if signed:
            x[index] += (1 if (hash(str(index)) % 2) == 1 else -1
                         )  # Disable for speed
        else:
            x[index] += 1

    return x  # x contains indices of features that have a value as number of occurences
Beispiel #6
0
    def test_32bit_basic_string( self ):
        solution = self._load_solutions('solution_hash32_seed0.txt', 10)

        with open( os.path.join( file_dir, 'pg1260.txt' ), 'rb' ) as test_file:
            for l in test_file.readlines():
                s = solution[l]
                r = pymmh3.hash( l )
                self.assertEqual( s, r, 'different hash for line: "%s"\n0x%08X != 0x%08X' % ( l, s, r ) )
Beispiel #7
0
 def lookup(self, string):
     string = str.encode(str(string))
     vals = []
     for seed in range(1, self.hash_count):
         result = mmh3.hash(string, seed) % self.size
         val = self.arrays[seed][result]
         vals.append(val)
     return min(vals)
Beispiel #8
0
 def _get_hash_values(self, element: str, range_size: int) -> List[int]:
     """
     Map the given element to it counters using the hash functions
     :param element: The element to map
     :return: A list of cells indexes
     """
     output: List[int] = []
     for i in range(self.num_of_hash_functions):
         hashed_value = mmh3.hash(element, i) % range_size
         output.append(hashed_value)
     return output
Beispiel #9
0
def get_request(url):
    try:
        r = s.get(url, verify=False, timeout=max_time)
        favicon = codecs.encode(r.content, "base64")
        hash = mmh3.hash(favicon)
        key = hash
        a.setdefault(key, [])
        a[key].append(url)
        return url, hash, None

    except Exception as e:
        return url, None, e
Beispiel #10
0
def getfaviconhash(url):
    try:
        response = requests.get(url, verify=False)
        if response.headers['Content-Type'] == "image/x-icon":
            favicon = base64.b64encode(response.content).decode('utf-8')
            hash = pymmh3.hash(change_format(favicon))
        else:
            hash = None
    except Exception:
        print("[!] Request Error")
        hash = None
    return hash           
Beispiel #11
0
def getfaviconhash(url):
    try:
        resp = s.get(url, verify=False)
        if "image" in resp.headers['Content-Type']:
            favicon = base64.b64encode(resp.content).decode('utf-8')
            hash = pymmh3.hash(change_format(favicon))
        else:
            hash = None
        resp.close()
    except Exception as ex:
        print("[!] Request Error"+"\n"+str(ex))
        hash = None
    return hash
Beispiel #12
0
 def set_server_nodes(self):
     for key in self.server_keys:
         for i in range(self.virtual_nodes):
             count = 0
             while True:
                 node_index = pymmh3.hash(key, i + count) % self.size
                 if node_index in self.nodes:
                     count += 1
                 else:
                     self.nodes[node_index] = key
                     self.server_node_loc.append(node_index)
                     self.server_keys_dict[key] = node_index
                     break
     self.server_node_loc.sort()
Beispiel #13
0
 def get_node(self, key):
     node_index = pymmh3.hash(key) % self.size
     # do server_node walk
     for idx, node in enumerate(self.server_node_loc):
         if node_index <= node: #[5, 14, 15, 23, 26, 38, 51, 53, 66, 81, 86, 90, 95, 98]
             print("1 Data sharded at node_index " + str(node_index) + " serviced by server at node_index: " + str(node) + "\n")
             if idx == len(self.server_node_loc) - 1:
                 print("1.1 Data replicated at node_index " + str(self.server_node_loc[0]) + "\n")
             else:
                 print("1.2 Data replicated at node_index " + str(self.server_node_loc[idx + 1]) + "\n")
             return
     # not in between, so must be on the first
     print("2 Data sharded at node_index " + str(node_index) + " serviced by server at node_index " + str(self.server_node_loc[0]) + "\n")
     print("2.1 Data replicated at server at node_index " + str(self.server_node_loc[1]) + "\n")
Beispiel #14
0
def get_x(csv_row, D):
    fullind = []
    for key, value in csv_row.items():
        s = key + '=' + value
        fullind.append(hash(s) % D) # weakest hash ever ?? Not anymore :P

    if interaction == True:
        indlist2 = []
        for i in range(len(fullind)):
            for j in range(i+1,len(fullind)):
                indlist2.append(fullind[i] ^ fullind[j]) # Creating interactions using XOR
        fullind = fullind + indlist2

    x = {}
    x[0] = 1  # 0 is the index of the bias term
    for index in fullind:
        if(not x.has_key(index)):
            x[index] = 0
        if signed:
            x[index] += (1 if (hash(str(index))%2)==1 else -1) # Disable for speed
        else:
            x[index] += 1
    
    return x  # x contains indices of features that have a value as number of occurences
Beispiel #15
0
def Mapping1(clientvalue,cohort,num_hashes,bfsize, allunique):
    inptomd=str(cohort)+clientvalue
    listofindx=[]
    
    i=0
    entries=0
    while(entries<num_hashes):
        digest = mmh3.hash(inptomd,i) % bfsize
        if(digest not in listofindx):
            listofindx.append(digest)
            entries=entries+1
        
        i=i+1
   
    return listofindx
Beispiel #16
0
def ShadonFaviconHash():

    arg = request.form.get('faviconUrl', type=str, default=None)

    if arg:
        response = requests.get(arg)
        if response.headers['Content-Type'] == "image/x-icon":
            favicon = base64.b64encode(response.content).decode('utf-8')
            hash = pymmh3.hash(change_format(favicon))
            result = '''
                其结果为:{}
            '''.format(hash)
            return render_template_string(result)

    else:
        return '''
Beispiel #17
0
def InsertStructuredInfo(listofSources):
    for listofObjects in listofSources:
        for num in range(3):
            q = NewsArticle.objects.filter(sourcename=listofObjects,
                                           hashprefix=num).limit(None)

            Articles = q.filter(state="content").limit(None)
            print(listofObjects, num, len(Articles))
            for a in range(len(Articles)):
                article = Articles[a]
                try:
                    #QueryForArticle = QueryForEachArticle(article.articlelink,EmdWords)
                    datestring = article.updated
                    dt = datetime.strptime(datestring, '%Y-%m-%d %H:%M:%S')
                    date = str(dt.month) + "-" + str(dt.year)
                    BOWnGrams = constructNGramsBOW(article.content)
                    BOWnGramsTitle = constructNGramsBOW(article.title)
                    #ner = NER(article.content)
                    ner = ""
                    d2v = doc2vec(article.articlelink, article.content)
                    centroidOfDocument = CentroidOfDocument(
                        BOWnGrams, EmdWords)
                    key_words = keywords(article.content)
                    #NewsArticleTM.create(sourcename=listofObjects,hashprefix=num,date=date,articlelink=article.articlelink,language="EN",key_words = key_words,bag_of_words=BOWnGrams,bag_of_words_title=BOWnGramsTitle,document_vector =d2v,centroid_of_document=centroidOfDocument)
                    #CentroidOfDocument = list(CentroidOfDocument)#.tolist()
                    #d2v = list(d2v)
                    NewsArticleTM.create(
                        sourcename=listofObjects,
                        hashprefix=(pymmh3.hash(article.articlelink) % 3),
                        date=date,
                        articlelink=article.articlelink,
                        language="EN",
                        key_words=key_words,
                        named_entities=ner,
                        bag_of_words=BOWnGrams,
                        bag_of_words_title=BOWnGramsTitle,
                        document_vector=d2v,
                        centroid_of_document=centroidOfDocument)
                    NewsArticle.objects(
                        sourcename=article.sourcename,
                        hashprefix=article.hashprefix,
                        articlelink=article.articlelink,
                        language="EN").update(state="structinfo")
                except Exception as ex:
                    print("in exception", ex)
                    pass
Beispiel #18
0
 def save_link(cls, title, url, body="", tags=[], clicks=0, unread=True):
     url = norm(url)
     id = mmh3.hash(url)
     key = ndb.Key(LinkModel, id)
     domain = urlparse(url).netloc
     if len(domain)>4 and domain.startswith('www.'):
         domain = domain[4:]
     link = LinkModel( key = key,
                       title = title,
                       url = url,
                       domain = domain,
                       body = body,
                       tags = tags,
                       clicks = clicks,
                       unread = unread )
     link.put()
     id = str(link.id)
     doc = cls._buildDoc(id, title, body, domain, tags)
     cls.add(doc)
     return cls(doc, link)
Beispiel #19
0
 def save_link(cls, title, url, body="", tags=[], clicks=0, unread=True):
     url = norm(url)
     id = mmh3.hash(url)
     key = ndb.Key(LinkModel, id)
     domain = urlparse(url).netloc
     if len(domain) > 4 and domain.startswith('www.'):
         domain = domain[4:]
     link = LinkModel(key=key,
                      title=title,
                      url=url,
                      domain=domain,
                      body=body,
                      tags=tags,
                      clicks=clicks,
                      unread=unread)
     link.put()
     id = str(link.id)
     doc = cls._buildDoc(id, title, body, domain, tags)
     cls.add(doc)
     return cls(doc, link)
Beispiel #20
0
def mapBloomFilter(clientvalue, icohort, nhashes, bfsize):

    inptomd=str(icohort)+clientvalue
    
    listofindx=[]

    i=0
    entries=0
    while(entries<nhashes):
        digest = mmh3.hash(inptomd,i) % bfsize
        if(digest not in listofindx):
            listofindx.append(digest)
            entries=entries+1
        
        i=i+1
        
    vec = np.zeros(bfsize)
    for ind in listofindx:
        vec[ind] = 1

    return vec
Beispiel #21
0
import pymmh3
import requests
import base64
import re
import time


def change_format(content):
    end_str_length = len(content) % 76
    new_content_list = re.findall(r'.{76}', content)
    end_str = content[-end_str_length::]

    new_content_list.append(end_str)
    return "{}\n".format("\n".join(new_content_list))


if __name__ == "__main__":
    start_time = time.time()
    response = requests.get('https://www.baidu.com/favicon.ico')
    if response.headers['Content-Type'] == "image/x-icon":
        favicon = base64.b64encode(response.content).decode('utf-8')
        hash = pymmh3.hash(change_format(favicon))
        print(hash)
    end_time = time.time()

    print("总用时为:{}".format((end_time - start_time)))
Beispiel #22
0
 def add(self, string, val):
     string = str.encode(str(string))
     for seed in range(1, self.hash_count):
         result = mmh3.hash(string, seed) % self.size
         self.arrays[seed][result] = val + self.arrays[seed][result]
Beispiel #23
0
 def add(self, string):
     string = str.encode(str(string))
     for seed in range(1, self.hash_count):
         result = mmh3.hash(string, seed) % self.size
         self.bit_array[result] = 1
    def get_hash(self):
        favicon_raw_data = self.get_favicon()
        favicon_data = self.chang_format(favicon_raw_data)

        result = pymmh3.hash(favicon_data)
        self.hash = result
Beispiel #25
0
import base64
import sys
import os

import shodan
import pymmh3 as mmh3
import requests

url = f'{sys.argv[1]}/favicon.ico'

try:
    content = requests.get(url).content
except Exception as e:
    print(e.message)
    os.exit(1)

h = mmh3.hash(base64.encodebytes(content))
print(f'http.favicon.hash:{h}')

api_key = os.getenv('API_KEY', '')
if api_key != '':
    api = shodan.Shodan(api_key)
    count = api.count(f'http.favicon.hash:{h}')
    print(f'count: {count.get("total", "not available")}')
 def add(self, item):
     for i in range(self.num_hashes):
         hash_idx = pymmh3.hash(item, i) % self.size
         self.filter[hash_idx] = 1
Beispiel #27
0
 def hash_func(self, key, node):
     return pymmh3.hash(key + str(node))
 def is_member(self, item):
     for i in range(self.num_hashes):
         hash_idx = pymmh3.hash(item, i) % self.size
         if self.filter[hash_idx] == 0:
             return False
     return True
Beispiel #29
0
 def bloomHash(self,inStr, hashSeed):
     #Use 32-bit hash function pymmh3 to map a string to an integer in {0, ..., m-1}
     
     maxHash = 2**31-1
     h=pymmh3.hash(inStr,hashSeed)/maxHash
     return int(0.5*self._blmM*(h+1))
# -*- coding: utf-8 -*-