def update_autocomplete(): from pathlib import Path from Webscraping import CONNECT MYSQL = CONNECT() artist, tags = MYSQL.execute('''SELECT GROUP_CONCAT(DISTINCT artist ORDER BY artist SEPARATOR ""), GROUP_CONCAT(DISTINCT tags ORDER BY tags SEPARATOR "") FROM imagedata''', fetch=1)[0] text = (' '.join(sorted(set(artist.split()))), ' '.join(sorted(set(tags.split())))) text = ('\n'.join(text)).encode('ascii', 'ignore') Path(r'GUI\autocomplete.txt').write_text(text.decode()) MYSQL.close()
def Check_Predictions(sql=False, num=25): from Webscraping import USER from MachineLearning import Model path = USER / r'Dropbox\ん' model = Model('deepdanbooru.hdf5') if sql: from Webscraping import CONNECT MYSQL = CONNECT() SELECT = f''' SELECT full_path(path), tags, type FROM imagedata WHERE SUBSTR(path, 32, 5) IN ('.jpg', '.png') ORDER BY RAND() LIMIT {num} ''' for image, tags, type_ in MYSQL.execute(SELECT, fetch=1): tags = sorted(tags.split()) image = path / image prediction = model.predict(image) similar = set(tags) & set(prediction) else: import cv2 import numpy as np from PIL import Image from random import choices glob = list(path.glob('[0-9a-f]/[0-9a-f]/*jpg')) for image in choices(glob, k=num): prediction = model.predict(image) image_ = np.aray(Image.open(image)) image_ = cv2.cvtColor(image_, cv2.COLOR_RGB2BGR) cv2.imshow(prediction, image_) cv2.waitKey(0)
def Artist_statistics(): from Webscraping import CONNECT MYSQL = CONNECT() SELECT = 'SELECT artist FROM imagedata GROUP BY artist HAVING COUNT(artist) > 100 ORDER BY artist' STATS = '''SELECT ( SELECT COUNT(*) FROM imagedata WHERE MATCH(tags, artist) AGAINST(%s IN BOOLEAN MODE) AND stars ) AS TOTAL, ( SELECT SUM(stars) FROM imagedata WHERE MATCH(tags, artist) AGAINST(%s IN BOOLEAN MODE) ) AS STARS ''' for artist, in MYSQL.execute(SELECT, fetch=1)[1:101]: sum, star = MYSQL.execute(STATS, (artist, artist), fetch=1) try: print(f'{artist.strip():<25} (Num: {sum:>4}, Stars: {star:>4}): {star / (sum*5):>4.2%}') except: continue
def unnamed(path): from Webscraping import CONNECT, utils from pathlib import Path MYSQL = CONNECT() SELECT = 'SELECT path FROM imagedata WHERE hash=%s OR path=%s' path = Path(path) num = 0 for file in path.glob('**/*.*'): hash_ = utils.get_hash(file) target = MYSQL.execute(SELECT, (hash_, file.name), fetch=1) if len(target) == 1 and (target:=target[0][0]) is not None: p = utils.PATH / target[:2] / target[2:4] / target if p.exists(): file.unlink() num += 1
def remove_redundancies(): from Webscraping import CONNECT MYSQL = CONNECT() SELECT = 'SELECT path, artist, tags FROM imagedata WHERE NOT ISNULL(path)' UPDATE = 'UPDATE imagedata SET artist=%s, tags=%s WHERE path=%s' for ( path, artist, tags, ) in MYSQL.execute(SELECT, fetch=1): artist = f' {" ".join(set(artist.split()))} '.replace('-', '_') tags = f' {" ".join(set(tags.split()))} '.replace('-', '_') MYSQL.execute(UPDATE, (artist, tags, path)) MYSQL.commit() MYSQL.close()
def Find_symmetric_videos(): from pathlib import Path from cv2 import VideoCapture from Webscraping import CONNECT def compare(seq): return [ (left == right).all() for left, right, num in zip(seq, reversed(seq), range(len(seq) // 2)) ] def symmetric(seq): if len(seq) % 2 != 0: del seq[len(seq) // 2] return any([ compare(seq[1:]), compare(seq), compare(seq[:-1]) ]) MYSQL = CONNECT() SELECT = 'SELECT path FROM imagedata WHERE MATCH(tags, artist) AGAINST("animated -audio" IN BOOLEAN MODE) AND type=0' for path, in MYSQL.execute(SELECT, fetch=1): frames = [] vidcap = VideoCapture(path) success, frame = vidcap.read() while success: frames.append(frame) success, frame = vidcap.read() if symmetric(frames): print(path)
def Remove_Intro(path): def contrast(img, k_size=25): # convert to LAB color space lab = cv2.cvtColor(img,cv2.COLOR_BGR2LAB) # separate channels L,A,B=cv2.split(lab) # compute minimum and maximum in 5x5 region using erode and dilate kernel = np.ones((k_size,k_size),np.uint8) min = cv2.erode(L,kernel,iterations = 1) max = cv2.dilate(L,kernel,iterations = 1) # convert min and max to floats min = min.astype(np.float64) max = max.astype(np.float64) # compute local contrast contrast = (max-min) / (max+(min+1)) # get average across whole image average_contrast = 100*np.mean(contrast) return average_contrast import ffmpeg, cv2, tempfile import numpy as np from pathlib import Path from Webscraping import CONNECT from Webscraping.utils import get_hash np.seterr(divide='ignore', invalid='ignore') SQL = CONNECT() UPDATE = 'UPDATE imagedata SET hash=%s WHERE path=%s' path = Path(path) vidcap = cv2.VideoCapture(str(path)) success, frame = vidcap.read() # highest = 0 while success: # min, max = frame.min(), frame.max() # x = frame.max() > 5 or frame.min() > 250 # g = contrast(frame) # if g > highest: # highest = g # if frame.max() > 5 or frame.min() > 250: # msec = vidcap.get(cv2.CAP_PROP_POS_MSEC) / 100 msec = vidcap.get(cv2.CAP_PROP_POS_MSEC) / 100 if (x:=contrast(frame)) > 5: break success, frame = vidcap.read() sec = msec / 1000 # print(f'{path} {sec:.4f}, {x:.4f}\n'); return temp = Path(tempfile.gettempdir(), path.name) ffmpeg.input(str(path)) \ .trim(start=sec) \ .setpts('PTS-STARTPTS') \ .output(str(temp)) \ .run() SQL.execute(UPDATE, (get_hash(temp), path.name)) path.write_bytes(temp.read_bytes()) SQL.commit()
def Normalize_Database(): '''''' import re, send2trash from pathlib import Path from Webscraping import CONNECT, USER, utils MYSQL = CONNECT() DROPBOX = USER / r'Dropbox\ん' RESERVE = r'Downloads\Reserve' parts = ", ".join([f"'{part}'" for part in DROPBOX.parts]).replace('\\', '') SELECT = f'SELECT full_path(path, {parts}) FROM imagedata WHERE NOT ISNULL(path)' select = 'SELECT src, href FROM imagedata WHERE path=%s' UPDATE = 'UPDATE imagedata SET path=NULL WHERE path=%s' DELETE = 'DELETE FROM imagedata WHERE path=%s' database = set( Path(path) for path, in MYSQL.execute(SELECT, fetch=1) ) windows = set(DROPBOX.glob('[0-9a-f][0-9a-f]/[0-9a-f][0-9a-f]/*.*')) x, y = database - windows, windows - database print(f'{len(x)} not in files') print(f'{len(y)} not in database') if not input('Go through with deletes? ').lower() in ('y', 'ye', 'yes'): return for num, file in enumerate(x, 1): if any(*MYSQL.execute(select, (file.name,), fetch=1)): MYSQL.execute(UPDATE, (file.name,)) else: MYSQL.execute(DELETE, (file.name,)) else: try: print(f'{num} records deleted') except: print('0 records deleted') MYSQL.commit() SELECT = 'SELECT path FROM imagedata WHERE hash=%s OR path=%s' for num, file in enumerate(y, 1): if re.match('.+ \(.+\)\..+', file.name): clean = re.sub(' \(.+\)', '', file.name) if file.with_name(clean).exists(): send2trash.send2trash(str(file)) continue hash_ = utils.get_hash(file) name = utils.get_name(file) image = MYSQL.execute(SELECT, (hash_, name.name), fetch=1) if image: try: if not file.exists(): file.rename(name) else: file.replace(USER / RESERVE / file.name) except FileExistsError: send2trash.send2trash(str(file)) else: try: file.replace(USER / RESERVE / file.name) except: continue else: try: print(f'{num} files moved') except: print('0 files moved')
def siamese(): import numpy as np import cv2 from Webscraping import CONNECT, USER from imutils import build_montages from PIL import Image MYSQL = CONNECT() SELECT = f'SELECT type+0, path FROM imagedata WHERE type in (1, 2) AND path LIKE "%.jpg" ORDER BY RAND() LIMIT 100' def make_pairs(images, labels, pairs=1): # initialize two empty lists to hold the (image, image) pairs and # labels to indicate if a pair is positive or negative pairImages = [] pairLabels = [] # calculate the total number of classes present in the dataset # and then build a list of indexes for each class label that # provides the indexes for all examples with a given label numClasses = len(np.unique(labels)) idx = [np.where(labels == i)[0] for i in range(0, numClasses)] # loop over all images for idxA in range(len(images)): # grab the current image and label belonging to the current # iteration currentImage = images[idxA] label = labels[idxA] # randomly pick an image that belongs to the *same* class # label idxB = np.random.choice(idx[label]) posImage = images[idxB] # prepare a positive pair and update the images and labels # lists, respectively pairImages.append([currentImage, posImage]) pairLabels.append([1]) # grab the indices for each of the class labels *not* equal to # the current label and randomly pick an image corresponding # to a label *not* equal to the current label negIdx = np.where(labels != label)[0] negImage = images[np.random.choice(negIdx)] # prepare a negative pair of images and update our lists pairImages.append([currentImage, negImage]) pairLabels.append([0]) return (np.array(pairImages), np.array(pairLabels)) def load_data(split=7): data = MYSQL.execute(SELECT, fetch=1) data = np.array([(int(i[0]), np.array(Image.open(i[1]))) for i in data]) length = len(data) // split return ((data[length:, 1], data[length:, 0]), (data[:length, 1], data[:length, 0])) print("[INFO] loading dataset...") (trainX, trainY), (testX, testY) = load_data() print("[INFO] preparing positive and negative pairs...") (pairTrain, labelTrain) = make_pairs(trainX, trainY) (pairTest, labelTest) = make_pairs(testX, testY) # initialize the list of images that will be used when building our # montage images = [] # loop over a sample of our training pairs for i in np.random.choice(np.arange(0, len(pairTrain)), size=(49, )): # grab the current image pair and label imageA = pairTrain[i][0] imageB = pairTrain[i][1] label = labelTrain[i] # to make it easier to visualize the pairs and their positive or # negative annotations, we're going to "pad" the pair with four # pixels along the top, bottom, and right borders, respectively output = np.zeros((36, 60), dtype="uint8") pair = np.hstack([imageA, imageB]) output[4:32, 0:56] = pair # set the text label for the pair along with what color we are # going to draw the pair in (green for a "positive" pair and # red for a "negative" pair) text = "neg" if label[0] == 0 else "pos" color = (0, 0, 255) if label[0] == 0 else (0, 255, 0) # create a 3-channel RGB image from the grayscale pair, resize # it from 28x28 to 96x51 (so we can better see it), and then # draw what type of pair it is on the image vis = cv2.merge([output] * 3) vis = cv2.resize(vis, (96, 51), interpolation=cv2.INTER_LINEAR) cv2.putText(vis, text, (2, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2) # add the pair visualization to our list of output images images.append(vis) # construct the montage for the images montage = build_montages(images, (96, 51), (7, 7))[0] # show the output montage cv2.imshow("Siamese Image Pairs", montage) cv2.waitKey(0)