def usfaces_df(queue): """Preprocess and augment US Face Database faces to data/. Returns pandas dataframe""" usfaces_df = pd.read_excel("Full Attribute Scores/demographic & others labels/demographic-others-labels.xlsx") usfaces_df = usfaces_df[["Filename", "Attractive"]] usfaces_df = usfaces_df.drop_duplicates(["Filename"]) for face in usfaces_df["Filename"]: base = os.path.splitext(face)[0] try: preprocess.resize("10k US Adult Faces Database/Face Images/{0}".format(face), "data/{0}".format(face)) preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base)) except: usfaces_df = usfaces_df[usfaces_df.Filename != face] flipped_df = usfaces_df.copy() noisy_df = usfaces_df.copy() flipped_df["Filename"] = flipped_df["Filename"].str[:-4] + "-F.jpg" noisy_df["Filename"] = noisy_df["Filename"].str[:-4] + "-N.jpg" df = pd.concat([usfaces_df, flipped_df, noisy_df], ignore_index=True) df.columns = ["Face", "Rating"] df["Rating"] *= 10.0 / 5.0 queue.put(df)
def scutfbp_df(queue): """Preprocess and augment SCUT-FBP faces to data/. Returns pandas dataframe""" scutfbp_df = pd.read_excel("Rating_Collection/Attractiveness label.xlsx") # Convert type of #Image column to str scutfbp_df["#Image"] = scutfbp_df["#Image"].astype(str) # Drop column Standard Deviation scutfbp_df = scutfbp_df.drop("Standard Deviation", 1) for face in os.listdir("Data_Collection"): if face.endswith(".jpg"): base = os.path.splitext(face)[0] # Regex to find numbers at end of string img_num = re.match(".*?([0-9]+)$", base).group(1) try: preprocess.resize("Data_Collection/{0}".format(face), "data/{0}".format(face)) preprocess.hflip("data/{0}".format(face), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(face), "data/{0}-N.jpg".format(base)) except: scutfbp_df = scutfbp_df[getattr(scutfbp_df, "#Image") != img_num] flipped_df = scutfbp_df.copy() noisy_df = scutfbp_df.copy() flipped_df["#Image"] = "SCUT-FBP-" + flipped_df["#Image"] + "-F.jpg" noisy_df["#Image"] = "SCUT-FBP-" + noisy_df["#Image"] + "-N.jpg" scutfbp_df["#Image"] = "SCUT-FBP-" + scutfbp_df["#Image"] + ".jpg" df = pd.concat([scutfbp_df, flipped_df, noisy_df], ignore_index=True) # Rename #Image -> Face and Attractiveness label -> Rating df.columns = ["Face", "Rating"] # Convert from 5 point scale to 10 point scale df["Rating"] *= 10.0 / 5.0 queue.put(df)
def models_df(queue): """Preprocess and augment models.com faces to data/. Returns pandas dataframe""" imgs = [] for i in range(1, 216): url = "http://models.com/newfaces/page/{0}".format(i) page = urllib2.urlopen(url).read() soup = BeautifulSoup(page, "lxml") for tag in soup.findAll("img", {"class": "attachment-square"}): src = "http:{0}".format(tag["src"]) base = uuid.uuid4().hex filename = base + ".jpg" urllib.urlretrieve(src, "data/{0}".format(filename)) try: preprocess.resize("data/{0}".format(filename), "data/{0}".format(filename)) preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base)) except: os.remove("data/{0}".format(filename)) continue imgs.append({"Face": filename, "Rating": 10}) imgs.append({"Face": "{0}-F.jpg".format(base), "Rating": 10}) imgs.append({"Face": "{0}-N.jpg".format(base), "Rating": 10}) df = pd.DataFrame(imgs) queue.put(df)
def chicago_df(queue): """Preprocess and augment Chicago faces to data/. Returns pandas dataframe""" chicago_df = pd.read_excel("CFD Version 2.0/CFD 2.0 Norming Data and Codebook.xlsx", skiprows=4) chicago_df = chicago_df[["Target", "Attractive"]] for dir in os.listdir("CFD Version 2.0/CFD 2.0 Images"): if dir == ".DS_Store": continue for face in os.listdir("CFD Version 2.0/CFD 2.0 Images/{0}".format(dir)): # Neutral faces if face.endswith("N.jpg"): # Is one face detected try: preprocess.resize( "CFD Version 2.0/CFD 2.0 Images/{0}/{1}".format(dir, face), "data/{0}.jpg".format(dir) ) preprocess.hflip("data/{0}.jpg".format(dir), "data/{0}-F.jpg".format(dir)) preprocess.add_noise("data/{0}.jpg".format(dir), "data/{0}-N.jpg".format(dir)) except: chicago_df = chicago_df[chicago_df.Target != dir] flipped_df = chicago_df.copy() noisy_df = chicago_df.copy() flipped_df["Target"] = flipped_df["Target"] + "-F.jpg" noisy_df["Target"] = noisy_df["Target"] + "-N.jpg" chicago_df["Target"] = chicago_df["Target"] + ".jpg" df = pd.concat([chicago_df, flipped_df, noisy_df], ignore_index=True) # Rename Target -> Face and Attractive -> Rating df.columns = ["Face", "Rating"] # Convert from 7 point scale to 10 point scale df["Rating"] *= 10.0 / 7.0 queue.put(df)
def eccv_df(queue): """Preprocess and augment Gray et al. dataset to data/. Returns pandas dataframe""" root = ET.parse("eccv2010_beauty_data/hotornot_face_all.xml").getroot() childs = [] for child in root: filename = os.path.split(child.attrib["filename"])[-1] base = os.path.splitext(filename)[0] try: preprocess.resize( "eccv2010_beauty_data/{0}".format(child.attrib["filename"]), "data/{0}".format(base + ".jpg"), crop=False, ) preprocess.hflip("data/{0}".format(filename), "data/{0}-F.jpg".format(base)) preprocess.add_noise("data/{0}".format(filename), "data/{0}-N.jpg".format(base)) except: continue childs.append([base + "-F.jpg", float(child.attrib["score"])]) childs.append([base + "-N.jpg", float(child.attrib["score"])]) childs.append([base + ".jpg", float(child.attrib["score"])]) df = pd.DataFrame(childs, columns=["Face", "Rating"]) df["Rating"] += 4 df["Rating"] *= 10.0 / 8.0 queue.put(df)