Esempio n. 1
0
def to_data(filename):
    h = Hasher(2 ** 20)
    df_all = pd.merge(pd.merge(df, user_df, on='u_st_2_uid', how='left'), item_df, on='d_st_2_did', how='left')
    print(df_all.dtypes)
    print('*' * 18)
    print(df_all.count())
    print('*' * 18)
    df_json = df_all.to_dict('records')

    # multiprocessing
    p = Pool(10)
    results = []

    for i, feature_value_dict in enumerate(df_json):
        # if i > 100:
        #     break
        results.append(p.apply_async(to_p, args=(i, feature_value_dict, h)))
    print('*' * 18)
    print(len(results))
    print('*' * 18)
    p.close()
    p.join()
    writer = tf.python_io.TFRecordWriter(filename)
    for r in results:
        writer.write(r.get())
    writer.close()
Esempio n. 2
0
    def __init__(self, init_dic):
        self.logger = getLogger()
        if not self.__isValidInfo(init_dic):
            self.logger.error(
                "Failed to init RequestURLCrawler : Invalid input information")
            exit(1)

        self.info_dic = init_dic
        self.cursor = None
        self.req_url_queue = [
        ]  # unvisited seeds (minimum heap ordered by page no.)
        # heappush(req_url_queue, (guid_hash, url_data))
        self.url_data_dic = dict(
        )  # visited + fully parsed data, dic[view_guid_hash] = URLData()
        self.hasher = Hasher()
        self.url_factory = None
        self.html_parser = None
        self.xml_producer = XMLPrinter(OUTPUT_PATH)
Esempio n. 3
0
from hasher import Hasher

hasher = Hasher('')

print(hasher.hash('my name is jack'))
Esempio n. 4
0
def main():
    args = parse_args()

    library_paths = args.paths
    if not library_paths:
        logging.error('no libraries specified')
        last_library_path = osxphotos.utils.get_last_library_path()
        system_library_path = osxphotos.utils.get_system_library_path()

        resp = input(f"use last .photoslibrary ({last_library_path}) [Y/n] ")
        if not resp or resp.lower() == 'y':
            library_paths.append(last_library_path)
        else:
            exit(2)

    db_session = fetch_or_initialize_db(args.db_path)

    applephotos, directories = fetch_libraries(library_paths, db_session)
    photos, videos, albums = fetch_photos(applephotos[0])  # TODO

    # TODO replace these dry-run guards with decorators
    if args.dry_run:
        logging.info('[dry-run] skipping photo persistence')
    else:
        logging.info('Persisting photo data')
        persist_photos(photos, db_session)

    hasher = Hasher()

    if args.dry_run:
        logging.info('[dry-run] skipping image encoding')
    else:
        logging.info("Encoding images with imagededup")
        imagededup_encodings = hasher.imagededup_encode(photos)

        logging.info("Encoding images with imagehash")
        imagehash_encodings = hasher.imagehash_encode(photos)

        logging.info('Persisting photo encodings')
        encodings = []

        for photo in photos:
            photo_id = photo.id

            for hash_name, value in imagededup_encodings[photo_id].items():
                enc = Encoding(photo_id=photo_id, hash_library=HashLibrary.imagededup, \
                  algorithm=get_hash_algo(hash_name), value=value)
                encodings.append(enc)

            for hash_name, value in imagehash_encodings[photo_id].items():
                enc = Encoding(photo_id=photo_id, hash_library=HashLibrary.imagehash, \
                  algorithm=get_hash_algo(hash_name), value=value)
                encodings.append(enc)

        db_session.add_all(encodings)
        db_session.commit()

    if args.dry_run:
        logging.info('[dry-run] skipping deduplication check and persistence')
    else:
        pass
Esempio n. 5
0
class SQL:
    
    app = Flask(__name__)
    mysql = MySQL()
    hasher = Hasher()
    
    def __init__(self):
        self.app.config['MYSQL_DATABASE_USER'] = '******'
        self.app.config['MYSQL_DATABASE_PASSWORD'] = ''
        self.app.config['MYSQL_DATABASE_DB'] = 'library'
        self.app.config['MYSQL_DATABASE_HOST'] = 'localhost'
        
    def checkUser(self, email, password):
        self.mysql.init_app(self.app)
        
        query = "SELECT * FROM `users`"
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No username/password')
            else:
                for row in r:
                    emailUser = unicode(row['email'])
                    passwordUser = unicode(row['password'])
                    
                    if self.hasher.compareStrings(email, emailUser) and password == passwordUser:
                        return True
                return False
        except:
            print('Error CheckUser')
            
    def getUser(self, email, password):
        self.mysql.init_app(self.app)
        
        query = "SELECT * FROM `users`"
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No username/password')
            else:
                for row in r:
                    emailUser = unicode(row['email'])
                    passwordUser = unicode(row['password'])
                    
                    if self.hasher.compareStrings(email, emailUser) and password == passwordUser:
                        user = {}
                        user['iduser'] = row['id']
                        user['user'] = unicode(row['user'])
                        user['guser'] = row['guser']
                        
                        return user
                return False
        except:
            print('Error getUser')
            
    def registerUser(self, email, password, username, gUser = 0):
        self.mysql.init_app(self.app)
        
        query = '''
            INSERT INTO `users`(`id`, `user`, `password`, `email`, `guser`) VALUES
                (NULL,''' + "'" + username  + "', '" + password + "', '" + email + "', " + str(gUser) + ")"
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
            
    def updateUser(self, data, section, idUser):
        self.mysql.init_app(self.app)
        
        column = ""
        idUserStr = str(idUser)
        
        if section == 'email':
            column = '`email`'
        elif section == 'user':
            column = '`user`'
        elif section == 'pass':
            column = '`user`'
        
        query = "UPDATE `users` SET " + section + " = '" + data + "' WHERE id = " + idUserStr
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
    
            
    def getHomeBook(self, idUser):
        self.mysql.init_app(self.app)

        query = '''
            SELECT `books`.`id` ,  `books`.`photo` , 
                    `books`.`bfile` ,  `books`.`bname`
                FROM  `books` 
                    INNER JOIN  `readings` ON  `books`.`id` =  `readings`.`idbook` 
                WHERE  `readings`.`iduser` =''' + str(idUser) + '''
                ORDER BY  `readings`.`lastreading` DESC
                LIMIT 1'''
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No books')
            else:
                return r
        except:
            print('Error getHomeBooks')
        
        
    def getAllBooks(self):
        self.mysql.init_app(self.app)
        
        query = '''SELECT  `id` ,  `photo` ,  `bname` FROM  `books` 
                        ORDER BY `id` DESC'''
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No books')
            else:
                return r
        except:
            print('Error getAllBooks')
            
    def getReadingsBooks(self, iduser):
        self.mysql.init_app(self.app)
        
        idUserStr = str(iduser)
        
        query = '''SELECT  `books`.`id` , `books`.`photo` ,  `books`.`bname` FROM  `books`
                    	INNER JOIN `readings` on `books`.`id` = `readings`.`idbook`
                    		WHERE `readings`.`iduser` like ''' + idUserStr + '''
                    	ORDER BY  `readings`.`lastreading` DESC'''
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No books')
            else:
                return r
        except:
            print('Error getReadingsBooks')
            
    def getReadLaterBooks(self, iduser):
        self.mysql.init_app(self.app)
        
        idUserStr = str(iduser)
        
        query = '''SELECT  `books`.`id` ,  `books`.`photo` ,  `books`.`bname` FROM  `books`
                    	INNER JOIN `read_later` on `books`.`id` = `read_later`.`idbook`
                    		WHERE `read_later`.`iduser` like ''' + idUserStr
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No books')
            else:
                return r
        except:
            print('Error getReadingsBooks')
            
    def getBook(self, idBook):
        self.mysql.init_app(self.app)
        
        query = '''SELECT `books`.`id`, `books`.`photo`, `books`.`bname`,
                    `books`.`synopsis`, `genres`.`genre`, `books`.`idauthor` 
                    	FROM `books`
                    		INNER JOIN `genres` ON `books`.`idgenre` = `genres`.`id` 
                		WHERE `books`.`id` = ''' + idBook
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No book')
            else:
                return r
        except:
            print('Error getBook')
            
    def getAuthor(self, idAuthor):
        self.mysql.init_app(self.app)
    
        query = "SELECT * FROM `author` WHERE `id` = " + str(idAuthor)
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No author')
            else:
                return r
        except:
            print('Error getAuthor')
            
    def getSimilarBooksByBook(self, idBook):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idBook)
        
        query = '''
            SELECT t.`id`, t.`photo`, t.`bname`, t.`synopsis`, `genres`.`genre`, t.`idauthor`  from
            	((SELECT * FROM `books` as book WHERE `idgenre` like (SELECT `idgenre` from `books` WHERE `id` = ''' + idBookStr + ''')) UNION
            	(SELECT * FROM `books` as book WHERE `idauthor` like (SELECT `idauthor` from `books` where `id` = ''' + idBookStr + ''')) UNION
            	(SELECT * FROM `books` as book WHERE `idcollect` like (SELECT `idcollect` from `books` where `id` = ''' + idBookStr + '''))) as t
            		INNER JOIN `genres` ON t.`idgenre` = `genres`.`id`
            		WHERE t.`idgenre` = `genres`.`id`
            		    AND t.`id` NOT LIKE ''' + idBookStr + '''
            		ORDER BY RAND()
            		LIMIT 6
        '''
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error getSimilarBooksByBook')

    def getBooksByAuthor(self, idAuthor):
        self.mysql.init_app(self.app)
        
        idAuthorStr = str(idAuthor)
        
        query = "SELECT * FROM `books` WHERE `idauthor` =" + idAuthorStr + '''
                ORDER BY RAND()
         		LIMIT 6'''
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error getBooksByAuthor')
            
    def searchBooks(self, column, words):
        self.mysql.init_app(self.app)
        
        if column  == "name":
            query = "SELECT * FROM `books` WHERE `bname` like '%" + words + "%'"
        elif column == "genre":
            query = '''
                SELECT * FROM `books` WHERE `idgenre` like
                    (SELECT `id` FROM `genres` WHERE `genre` like '%''' + words + "%')"
        elif column == "author":
            query = '''
                SELECT * FROM `books` WHERE `idauthor` like
                    (SELECT `id` FROM `author` WHERE `first` like '%''' + words + "%' OR `last` like '%" + words + "%')"
        elif column == "collection":
            query = '''
                SELECT * FROM `books` WHERE `idcollect` like
                    (SELECT `id` FROM `collections` WHERE `namecollection` like '%''' + words + "%')"
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error searchBooks')
            
    def searchReadLater(self, words, idUser):
        self.mysql.init_app(self.app)
        
        idUserStr = str(idUser)
        
        query = '''
            SELECT * FROM `books`
            	INNER JOIN `read_later` ON `books`.`id` = `read_later`.`idbook`
            	WHERE `read_later`.`iduser` = ''' + idUserStr + '''
            		AND `books`.`bname` like''' + "'%" + words + "%'"
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error searchBooks')
            
    def searchPendings(self, words, idUser):
        self.mysql.init_app(self.app)
        
        idUserStr = str(idUser)
        
        query = '''
            SELECT * FROM `books`
            	INNER JOIN `readings` ON `books`.`id` = `readings`.`idbook`
            	WHERE `readings`.`iduser` = ''' + idUserStr + '''
            		AND `books`.`bname` like''' + "'%" + words + "%'"
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error searchBooks')
            
            
    def checkReadLater(self, idUser, idBook):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idBook)
        idUserStr = str(idUser)
        
        query = '''
            SELECT * FROM `read_later` WHERE
                `iduser` like ''' + idUserStr + ''' AND
                `idbook` like ''' + idBookStr
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                return False
            else:
                return True
        except:
            print('Error checkReadLater')
         
            
    def addReadLater(self, idUser, idBook):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idBook)
        idUserStr = str(idUser)
        
        query = '''
            INSERT INTO `read_later`(`id`, `iduser`, `idbook`)
                VALUES (NULL, ''' + idUserStr + ", " + idBookStr + ")"
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
      
            
    def removeReadLater(self, idUser, idBook):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idBook)
        idUserStr = str(idUser)
        
        query = '''
            DELETE FROM `read_later` WHERE 
                `iduser` like ''' + idUserStr + ''' AND
                `idbook` like ''' + idBookStr
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
    
    def getBfile(self, idbook):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idbook)
        
        query = "SELECT `bfile` FROM `books` WHERE `id` =" + idBookStr
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                print('No Books')
            else:
                return r
        except:
            print('Error getSimilarBooksByBook')
            
    def getAlines(self, idbook, iduser):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idbook)
        idUserStr = str(iduser)
        
        query = '''
            SELECT `alines` FROM `readings`
            	WHERE `iduser` = ''' + idUserStr + '''
            	AND `idbook` = ''' + idBookStr
        
        cur = self.mysql.connect().cursor()
        cur.execute(query)
        
        try:
            r = [dict((cur.description[i][0], value)
                  for i, value in enumerate(row)) for row in cur.fetchall()]
            if len(r) == 0:
                return self.insertAlines(idbook,iduser)
            else:
                self.updateDateLastReading(idbook, iduser)
                return r
        except:
            print('Error getAlines')
            
    def insertAlines(self, idbook, iduser):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idbook)
        idUserStr = str(iduser)
        
        query = '''
            INSERT INTO `readings`(`id`, `iduser`, `idbook`, `alines`, `lastreading`)
	            VALUES (NULL''' + ", " + idUserStr + ", " + idBookStr + ", 0, NOW())"
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return self.getAlines(idbook,iduser)
        except:
            return False
            
    def updateDateLastReading(self, idbook, iduser):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idbook)
        idUserStr = str(iduser)
        
        query = '''
            UPDATE `readings` SET `lastreading`= NOW()
                WHERE 
                `iduser` like ''' + idUserStr + ''' AND
                `idbook` like ''' + idBookStr
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
            
    def updateAlines(self, idbook, iduser, alines):
        self.mysql.init_app(self.app)
        
        idBookStr = str(idbook)
        idUserStr = str(iduser)
        alinesStr = str(alines)
        
        query = '''
            UPDATE `readings` SET `alines`=''' + alines + '''
                WHERE 
                `iduser` like ''' + idUserStr + ''' AND
                `idbook` like ''' + idBookStr
        
        try:
            con = self.mysql.connect()
            cur = con.cursor()
            cur.execute(query)
            con.commit()
            return True
        except:
            return False
Esempio n. 6
0
 def __init__(self, size, keys):
     self._value = Bitset(size)
     self._size = size
     self._keys = keys
     self._hasher = Hasher()
def __init_hasher__():
    global __HASHER_START_X__
    global __HASHER_START_Y__
    hasher = Hasher(__HASHER_START_X__, __HASHER_START_Y__)
    add_hasher(hasher)
Esempio n. 8
0
    queue_hashed = queue.Queue()
    queue_ext_path = queue.Queue()
    queue_csv = queue.Queue()
    queue_csved = queue.Queue()
    queue_blk = queue.Queue()
    queue_mem = queue.Queue()
    queue_memed = queue.Queue()
    queue_rslt = queue.Queue()
    queue_elastic = queue.Queue()

    see = Seeker(queue_dis, IN_DIR, BASE_NAME, CHECK_TIME)
    dis = Dispatcher(queue_dis, queue_extrac, queue_extraced, queue_ext_path,
                     queue_av, queue_hash, queue_hashed, queue_csv,
                     queue_csved, queue_blk, queue_mem, queue_memed,
                     queue_elastic, IN_DIR, WORK_DIR, OUT_DIR, DIR_OUT)
    has = Hasher(queue_hash, queue_hashed, IN_DIR, WORK_DIR, BLOCK_SIZE_HASH)
    ext = Extractor(queue_extrac, queue_extraced, queue_ext_path, IN_DIR,
                    WORK_DIR)
    csv = Csver(queue_csv, queue_csved, WORK_DIR, OUT_DIR)
    blk = Bulker(queue_blk, queue_extraced, WORK_DIR, OUT_DIR)
    mem = Memer(queue_mem, queue_extraced, IN_DIR, WORK_DIR, OUT_DIR)
    #tim = Timeliner(queue_extrac,WORK_DIR,OUT_DIR)
    avc = Avcheck(queue_av, WORK_DIR, OUT_DIR)
    #elas = Elasticer(queue_elastic,WORK_DIR,OUT_DIR)

    see.start()
    dis.start()
    has.start()
    ext.start()
    csv.start()
    #blk.start()
Esempio n. 9
0
 def setUp(self):
     self.hasher = Hasher("words.txt", nwords=3, delimeter="-")
Esempio n. 10
0
def main():
    hasher = Hasher(args.wordfile, nwords=3, delimeter="-")
    pprint(hasher.process(args.input))