Ejemplo n.º 1
0
def update_autocomplete():

    from pathlib import Path
    from Webscraping import CONNECT

    MYSQL = CONNECT()

    artist, tags = MYSQL.execute('''SELECT 
        GROUP_CONCAT(DISTINCT artist ORDER BY artist SEPARATOR ""), 
        GROUP_CONCAT(DISTINCT tags ORDER BY tags SEPARATOR "") 
        FROM imagedata''',
                                 fetch=1)[0]
    text = (' '.join(sorted(set(artist.split()))),
            ' '.join(sorted(set(tags.split()))))
    text = ('\n'.join(text)).encode('ascii', 'ignore')
    Path(r'GUI\autocomplete.txt').write_text(text.decode())

    MYSQL.close()
Ejemplo n.º 2
0
def Check_Predictions(sql=False, num=25):
    
    from Webscraping import USER
    from MachineLearning import Model

    path = USER / r'Dropbox\ん'
    model = Model('deepdanbooru.hdf5')

    if sql:
        
        from Webscraping import CONNECT
        
        MYSQL = CONNECT()
        SELECT = f'''
            SELECT full_path(path), tags, type 
            FROM imagedata 
            WHERE SUBSTR(path, 32, 5) IN ('.jpg', '.png')
            ORDER BY RAND() LIMIT {num}
            '''

        for image, tags, type_ in MYSQL.execute(SELECT, fetch=1):

            tags = sorted(tags.split())
            image = path / image
            prediction = model.predict(image)
            similar = set(tags) & set(prediction)

    else:

        import cv2
        import numpy as np
        from PIL import Image
        from random import choices

        glob = list(path.glob('[0-9a-f]/[0-9a-f]/*jpg'))

        for image in choices(glob, k=num):

            prediction = model.predict(image)

            image_ = np.aray(Image.open(image))
            image_ = cv2.cvtColor(image_, cv2.COLOR_RGB2BGR)
            cv2.imshow(prediction, image_)
            cv2.waitKey(0)
Ejemplo n.º 3
0
def Artist_statistics():

    from Webscraping import CONNECT

    MYSQL = CONNECT()

    SELECT = 'SELECT artist FROM imagedata GROUP BY artist HAVING COUNT(artist) > 100 ORDER BY artist'
    STATS = '''SELECT (
        SELECT COUNT(*) FROM imagedata 
        WHERE MATCH(tags, artist) AGAINST(%s IN BOOLEAN MODE) AND stars
        ) AS TOTAL,
        (
        SELECT SUM(stars) FROM imagedata 
        WHERE MATCH(tags, artist) AGAINST(%s IN BOOLEAN MODE)
        ) AS STARS
        '''

    for artist, in MYSQL.execute(SELECT, fetch=1)[1:101]:

        sum, star = MYSQL.execute(STATS, (artist, artist), fetch=1)
        try: print(f'{artist.strip():<25} (Num: {sum:>4}, Stars: {star:>4}): {star / (sum*5):>4.2%}')
        except: continue
Ejemplo n.º 4
0
def unnamed(path):

    from Webscraping import CONNECT, utils
    from pathlib import Path

    MYSQL = CONNECT()
    SELECT = 'SELECT path FROM imagedata WHERE hash=%s OR path=%s'
    path = Path(path)
    num = 0

    for file in path.glob('**/*.*'):

        hash_ = utils.get_hash(file)
        target = MYSQL.execute(SELECT, (hash_, file.name), fetch=1)
        if len(target) == 1 and (target:=target[0][0]) is not None:
            
            p = utils.PATH / target[:2] / target[2:4] / target
            
            if p.exists():
                
                file.unlink()
                num += 1
Ejemplo n.º 5
0
def remove_redundancies():

    from Webscraping import CONNECT

    MYSQL = CONNECT()
    SELECT = 'SELECT path, artist, tags FROM imagedata WHERE NOT ISNULL(path)'
    UPDATE = 'UPDATE imagedata SET artist=%s, tags=%s WHERE path=%s'

    for (
            path,
            artist,
            tags,
    ) in MYSQL.execute(SELECT, fetch=1):

        artist = f' {" ".join(set(artist.split()))} '.replace('-', '_')
        tags = f' {" ".join(set(tags.split()))} '.replace('-', '_')
        MYSQL.execute(UPDATE, (artist, tags, path))

    MYSQL.commit()
    MYSQL.close()
Ejemplo n.º 6
0
def Find_symmetric_videos():

    from pathlib import Path
    from cv2 import VideoCapture
    from Webscraping import CONNECT

    def compare(seq):
        
        return [
            (left == right).all() for left, right, num in
            zip(seq, reversed(seq), range(len(seq) // 2))
            ]

    def symmetric(seq):
        
        if len(seq) % 2 != 0: del seq[len(seq) // 2]
        
        return any([
            compare(seq[1:]), compare(seq), compare(seq[:-1])
            ])
    
    MYSQL = CONNECT()
    SELECT = 'SELECT path FROM imagedata WHERE MATCH(tags, artist) AGAINST("animated -audio" IN BOOLEAN MODE) AND type=0'

    for path, in MYSQL.execute(SELECT, fetch=1):

        frames = []
        vidcap = VideoCapture(path)
        success, frame = vidcap.read()

        while success:
            
            frames.append(frame)
            success, frame = vidcap.read()
            
        if symmetric(frames): print(path)
Ejemplo n.º 7
0
def Remove_Intro(path):

    def contrast(img, k_size=25):

        # convert to LAB color space
        lab = cv2.cvtColor(img,cv2.COLOR_BGR2LAB)

        # separate channels
        L,A,B=cv2.split(lab)

        # compute minimum and maximum in 5x5 region using erode and dilate
        kernel = np.ones((k_size,k_size),np.uint8)
        min = cv2.erode(L,kernel,iterations = 1)
        max = cv2.dilate(L,kernel,iterations = 1)

        # convert min and max to floats
        min = min.astype(np.float64) 
        max = max.astype(np.float64) 

        # compute local contrast
        contrast = (max-min) / (max+(min+1))

        # get average across whole image
        average_contrast = 100*np.mean(contrast)

        return average_contrast

    import ffmpeg, cv2, tempfile
    import numpy as np
    from pathlib import Path
    from Webscraping import CONNECT
    from Webscraping.utils import get_hash

    np.seterr(divide='ignore', invalid='ignore')
    SQL = CONNECT()
    UPDATE = 'UPDATE imagedata SET hash=%s WHERE path=%s'

    path = Path(path)
    vidcap = cv2.VideoCapture(str(path))
    success, frame = vidcap.read()
    # highest = 0

    while success:

        # min, max = frame.min(), frame.max()
        # x = frame.max() > 5 or frame.min() > 250
        # g = contrast(frame)
        # if g > highest: 
        #     highest = g
        # if frame.max() > 5 or frame.min() > 250:

        #     msec = vidcap.get(cv2.CAP_PROP_POS_MSEC) / 100
        msec = vidcap.get(cv2.CAP_PROP_POS_MSEC) / 100
        if (x:=contrast(frame)) > 5: break
        success, frame = vidcap.read()

    sec = msec / 1000
    # print(f'{path} {sec:.4f}, {x:.4f}\n'); return
    temp = Path(tempfile.gettempdir(), path.name)
    ffmpeg.input(str(path)) \
        .trim(start=sec) \
        .setpts('PTS-STARTPTS') \
        .output(str(temp)) \
        .run()

    SQL.execute(UPDATE, (get_hash(temp), path.name))
    path.write_bytes(temp.read_bytes())
    SQL.commit()
Ejemplo n.º 8
0
def Normalize_Database():
    ''''''

    import re, send2trash
    from pathlib import Path
    from Webscraping import CONNECT, USER, utils
    
    MYSQL = CONNECT()
    DROPBOX = USER / r'Dropbox\ん'
    RESERVE = r'Downloads\Reserve'
    parts = ", ".join([f"'{part}'" for part in DROPBOX.parts]).replace('\\', '')
    SELECT = f'SELECT full_path(path, {parts}) FROM imagedata WHERE NOT ISNULL(path)'
    select = 'SELECT src, href FROM imagedata WHERE path=%s'
    UPDATE = 'UPDATE imagedata SET path=NULL WHERE path=%s'
    DELETE = 'DELETE FROM imagedata WHERE path=%s'

    database = set(
        Path(path) for path, in MYSQL.execute(SELECT, fetch=1)
        )
    windows = set(DROPBOX.glob('[0-9a-f][0-9a-f]/[0-9a-f][0-9a-f]/*.*'))
    x, y = database - windows, windows - database
        
    print(f'{len(x)} not in files')
    print(f'{len(y)} not in database')
    
    if not input('Go through with deletes? ').lower() in ('y', 'ye', 'yes'):
        
        return
    
    for num, file in enumerate(x, 1):
        if any(*MYSQL.execute(select, (file.name,), fetch=1)):
            MYSQL.execute(UPDATE, (file.name,))
        else:
            MYSQL.execute(DELETE, (file.name,))
    else:
        try: print(f'{num} records deleted')
        except: print('0 records deleted')
        MYSQL.commit()

    SELECT = 'SELECT path FROM imagedata WHERE hash=%s OR path=%s'

    for num, file in enumerate(y, 1):

        if re.match('.+ \(.+\)\..+', file.name):
            clean = re.sub(' \(.+\)', '', file.name)
            if file.with_name(clean).exists():
                send2trash.send2trash(str(file))
                continue

        hash_ = utils.get_hash(file)
        name = utils.get_name(file)

        image = MYSQL.execute(SELECT, (hash_, name.name), fetch=1)
        if image:
            try: 
                if not file.exists(): file.rename(name)
                else: file.replace(USER / RESERVE / file.name)
            except FileExistsError: send2trash.send2trash(str(file))
        else:
            try: file.replace(USER / RESERVE / file.name)
            except: continue
    else:
        try: print(f'{num} files moved')
        except: print('0 files moved')
Ejemplo n.º 9
0
def siamese():

    import numpy as np
    import cv2
    from Webscraping import CONNECT, USER
    from imutils import build_montages
    from PIL import Image

    MYSQL = CONNECT()
    SELECT = f'SELECT type+0, path FROM imagedata WHERE type in (1, 2) AND path LIKE "%.jpg" ORDER BY RAND() LIMIT 100'

    def make_pairs(images, labels, pairs=1):
        # initialize two empty lists to hold the (image, image) pairs and
        # labels to indicate if a pair is positive or negative
        pairImages = []
        pairLabels = []

        # calculate the total number of classes present in the dataset
        # and then build a list of indexes for each class label that
        # provides the indexes for all examples with a given label
        numClasses = len(np.unique(labels))
        idx = [np.where(labels == i)[0] for i in range(0, numClasses)]

        # loop over all images
        for idxA in range(len(images)):
            # grab the current image and label belonging to the current
            # iteration
            currentImage = images[idxA]
            label = labels[idxA]
            # randomly pick an image that belongs to the *same* class
            # label
            idxB = np.random.choice(idx[label])
            posImage = images[idxB]
            # prepare a positive pair and update the images and labels
            # lists, respectively
            pairImages.append([currentImage, posImage])
            pairLabels.append([1])

            # grab the indices for each of the class labels *not* equal to
            # the current label and randomly pick an image corresponding
            # to a label *not* equal to the current label
            negIdx = np.where(labels != label)[0]
            negImage = images[np.random.choice(negIdx)]
            # prepare a negative pair of images and update our lists
            pairImages.append([currentImage, negImage])
            pairLabels.append([0])

        return (np.array(pairImages), np.array(pairLabels))

    def load_data(split=7):

        data = MYSQL.execute(SELECT, fetch=1)
        data = np.array([(int(i[0]), np.array(Image.open(i[1])))
                         for i in data])
        length = len(data) // split

        return ((data[length:, 1], data[length:, 0]), (data[:length,
                                                            1], data[:length,
                                                                     0]))

    print("[INFO] loading dataset...")
    (trainX, trainY), (testX, testY) = load_data()
    print("[INFO] preparing positive and negative pairs...")
    (pairTrain, labelTrain) = make_pairs(trainX, trainY)
    (pairTest, labelTest) = make_pairs(testX, testY)
    # initialize the list of images that will be used when building our
    # montage
    images = []

    # loop over a sample of our training pairs
    for i in np.random.choice(np.arange(0, len(pairTrain)), size=(49, )):
        # grab the current image pair and label
        imageA = pairTrain[i][0]
        imageB = pairTrain[i][1]
        label = labelTrain[i]
        # to make it easier to visualize the pairs and their positive or
        # negative annotations, we're going to "pad" the pair with four
        # pixels along the top, bottom, and right borders, respectively
        output = np.zeros((36, 60), dtype="uint8")
        pair = np.hstack([imageA, imageB])
        output[4:32, 0:56] = pair
        # set the text label for the pair along with what color we are
        # going to draw the pair in (green for a "positive" pair and
        # red for a "negative" pair)
        text = "neg" if label[0] == 0 else "pos"
        color = (0, 0, 255) if label[0] == 0 else (0, 255, 0)
        # create a 3-channel RGB image from the grayscale pair, resize
        # it from 28x28 to 96x51 (so we can better see it), and then
        # draw what type of pair it is on the image
        vis = cv2.merge([output] * 3)
        vis = cv2.resize(vis, (96, 51), interpolation=cv2.INTER_LINEAR)
        cv2.putText(vis, text, (2, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color,
                    2)
        # add the pair visualization to our list of output images
        images.append(vis)

    # construct the montage for the images
    montage = build_montages(images, (96, 51), (7, 7))[0]
    # show the output montage
    cv2.imshow("Siamese Image Pairs", montage)
    cv2.waitKey(0)