def select_best_candidates(entry, filenames): entry_key = asmmetadata.get_entry_key(entry) match_values = [] max_distance = 0.0 for filename in filenames: file_key = normalize_remove_numeric_prefix(filename) file_key = normalize_remove_suffix(file_key) file_key = asmmetadata.normalize_key(file_key) distance = pyjarowinkler.distance.get_jaro_distance( entry_key, file_key) max_distance = max(distance, max_distance) match_values.append((filename, distance)) if max_distance < 0.75: return [] return [x[0] for x in filter(lambda x: x[1] == max_distance, match_values)]
import os.path import subprocess import sys target_directory = sys.argv[1] if not os.path.exists(target_directory): print "Target directory %s does not exist!" % target_directory sys.exit(1) entry_data = asmmetadata.parse_file(sys.stdin) for entry in entry_data.entries: if 'sceneorg' not in entry: continue sceneorg_path = entry['sceneorg'] section_directory = entry['section']['key'].replace("-", "_") file_directory = asmmetadata.normalize_key("%s by %s" % (entry['title'], entry['author'])).replace("-", "_") download_directory = os.path.join( target_directory, section_directory, file_directory, ) if not os.path.exists(download_directory): os.makedirs(download_directory) download_file = os.path.join( download_directory, os.path.basename(sceneorg_path) ) download_url = "ftp://ftp.scene.org/pub%s" % sceneorg_path subprocess.call(['wget', '-O', download_file, download_url])
def print_entry(year, entry, description_generator): title = entry['title'] author = entry['author'] section_name = entry['section']['name'] name = asmmetadata.get_entry_name(entry) normalized_name = asmmetadata.get_entry_key(entry) normalized_section = asmmetadata.normalize_key(section_name) position = entry.get('position', 0) extra_assets = "" locations = "" description = u"" if 'warning' in entry: description += u"%s</p>\n<p>" % cgi.escape(entry['warning']) position_str = None if entry["section"].get("ranked", True): if position != 0: position_str = str(position) + asmmetadata.get_ordinal_suffix(position) + " place" has_media = False display_author = None if "Misc" in section_name or "Photos" in section_name: pass elif not "AssemblyTV" in section_name and not "Winter" in section_name: display_author = author if not "Seminars" in section_name: description += description_generator(entry, position_str) if 'description' in entry: description += u"%s</p>\n<p>" % cgi.escape(entry['description']) if 'platform' in entry: description += u"Platform: %s</p>\n<p>" % cgi.escape(entry['platform']) if 'techniques' in entry: description += u"Notes: %s</p>\n<p>" % cgi.escape(entry['techniques']) if display_author is not None: description += u"Author: %s\n" % cgi.escape(display_author) # Youtube is our primary location if "youtube" in entry: youtube_id_time = asmmetadata.get_timed_youtube_id(entry) has_media = True locations += "<location type='youtube'>%s</location>" % youtube_id_time # Youtube is primary location demoscenetv = entry.get('dtv') if demoscenetv: has_media = True locations += "<location type='demoscenetv'>%s</location>" % (escape(demoscenetv)) # XXX prevent the creation of humongous files. # if 'galleriafi' in entry: # return if entry.get('image-file') or entry.get('galleriafi'): image_file = entry.get('image-file') if image_file is None: image_file = "%s/%s.jpeg" % (normalized_section, normalized_name) if asmmetadata.is_image(image_file): has_media = True baseprefix, _ = image_file.split(".") viewfile, postfix = select_smaller_thumbnail(os.path.join(FILEROOT, 'thumbnails/large/%s' % baseprefix)) normal_prefix = asmmetadata.normalize_key(baseprefix) image_filename = normal_prefix + postfix locations += "<location type='image'>%s|%s</location>" % (image_filename, escape(name)) extra_assets += display_asset( "%d/%s/%s/%s" % (year, normalized_section, normalized_name, image_filename), name, viewfile) webfile = entry.get('webfile') if webfile: if asmmetadata.is_image(webfile): has_media = True baseprefix, _ = webfile.split(".") viewfile, postfix = select_smaller_thumbnail(os.path.join(FILEROOT, 'thumbnails/large/%s' % baseprefix)) normal_prefix = asmmetadata.normalize_key(baseprefix) image_filename = normal_prefix + postfix locations += "<location type='download'>http://media.assembly.org/compo-media/assembly%d/%s|Full resolution</location>" % (year, webfile) locations += "<location type='image'>%s|%s</location>" % (image_filename, escape(name)) extra_assets += display_asset( "%d/%s/%s/%s" % (year, normalized_section, normalized_name, image_filename), name, viewfile) elif webfile.endswith(".mp3"): locations += "<location type='download'>http://media.assembly.org/compo-media/assembly%d/%s|MP3</location>" % (year, webfile) pouet = entry.get('pouet') if pouet: locations += "<location type='pouet'>%s</location>" % (pouet) download = entry.get('download') if download: download_type = "Original" if "game" in section_name.lower(): download_type = "Playable game" locations += "<location type='download'>%s|%s</location>" % (escape(download), download_type) sceneorg = entry.get('sceneorg') if sceneorg: download_type = "Original" if "game" in section_name.lower(): download_type = "Playable game" if ";" in sceneorg: parts = sceneorg.split(";") i = 1 for part in parts: locations += "<location type='sceneorg'>%s|%s (%d/%d)</location>" % ( escape(part), download_type, i, len(parts)) i += 1 else: locations += "<location type='sceneorg'>%s|%s</location>" % (escape(sceneorg), download_type) sceneorgvideo = entry.get('sceneorgvideo') mediavideo = entry.get('media') if sceneorgvideo: locations += "<location type='sceneorg'>%s|HQ video</location>" % (escape(sceneorgvideo)) elif mediavideo: locations += "<location type='download'>http://media.assembly.org%s|HQ video</location>" % (mediavideo) galleriafi = entry.get("galleriafi") if galleriafi: locations += "<location type='download'>http://assembly.galleria.fi%s|Original image</location>" % (galleriafi) if not has_media: return has_thumbnail = False if entry.get('use-parent-thumbnail', False) is True: has_thumbnail = True else: thumbnail_data = get_thumbnail_data(entry) if thumbnail_data is not None: has_thumbnail = True if not has_thumbnail: return ranking = 'ranking="%d"' % position if position == 0: ranking = '' description_non_unicode = description tags = set() entry_tags = entry.get('tags') if entry_tags: tags.update(entry_tags.split(" ")) if entry.get('use-parent-thumbnail', False) is False: thumbnail_asset = """ <asset path="%(year)s/%(normalizedsection)s/%(normalizedname)s/thumbnail"> <edition parameters="lang: workflow:public" title=%(title)s tags="hide-search" created="%(current-time)s" modified="%(current-time)s"><![CDATA[%(data)s ]]></edition> </asset> """ % {'year': year, 'normalizedsection': normalized_section, 'normalizedname': normalized_name, 'data': base64.encodestring(thumbnail_data), 'title': quoteattr(title), 'current-time': CURRENT_TIME, } else: thumbnail_asset = '' asset_data = """ <externalasset path="%(year)s/%(normalizedsection)s/%(normalizedname)s"> <edition parameters="lang: workflow:public" title=%(title)s tags=%(tags)s created="%(current-time)s" modified="%(current-time)s"> <mediagalleryadditionalinfo author=%(author)s description=%(description)s %(ranking)s></mediagalleryadditionalinfo> %(locations)s </edition> </externalasset> %(thumbnail)s """ % {'year': year, 'normalizedsection': normalized_section, 'normalizedname': normalized_name, 'title': quoteattr(title), 'author': quoteattr(author), 'ranking': ranking, 'thumbnail': thumbnail_asset, 'locations': locations, 'description': quoteattr(description_non_unicode), 'current-time': CURRENT_TIME, 'tags': quoteattr(" ".join(tags)), } asset_data_str = asset_data.encode("utf-8") print asset_data_str extra_assets_str = extra_assets.encode("utf-8") print extra_assets_str
if author == "": author = "unknown" known_titles = set() for image_path, image_data in sorted(files.items(), lambda x, y: cmp(x[0], y[0])): image_name = image_path.replace(folder_key, "") title = urllib.unquote_plus(image_name) next_id = 2 new_title = title while new_title.lower() in known_titles: new_title = "%s-%d" % (title, next_id) next_id += 1 title = new_title known_titles.add(title.lower()) filename = asmmetadata.normalize_key( "%s by %s" % (title, author)) + ".jpeg" print_shell( "wget -nc --no-host '%s://%s%s?img=full' -O '%s'/%s" % ( parsed_url.scheme, parsed_url.netloc, image_path, args.photos_root, filename)) image_file = "%s/%s" % (photo_category, filename) print_metadata("author:%s|title:%s|galleriafi:%s|image-file:%s" % ( author.encode("utf-8"), title.encode("utf-8"), image_path.encode("utf-8"), image_file.encode("utf-8")))
author = author.replace("<", "-").replace(">", "-") position = entryinfo.get('position', None) if position is None: #position_filename = "9%02d" % zero_position position_filename = "-99" else: position_filename = "-%02d" % position if section.lower() in ["misc", "assemblytv", "winter", "seminars"]: position_filename = "" source_file_base = asmmetadata.normalize_key( "%s-%s%s-%s-by-%s" % ( year, section, position_filename, title, author) ) source_file = os.path.join( files_root, year, source_file_base + video_postfix) if not os.path.exists(source_file) and 'video-file' in entryinfo: source_file = os.path.join(files_root, entryinfo['video-file']) if not os.path.exists(source_file) and 'media' in entryinfo and not media_vod_directory is None: source_file = os.path.join(media_vod_directory, entryinfo['media'].lstrip("/")) if not os.path.exists(source_file): print line.encode('utf-8') continue
def create_small_thumbnail_file(args, source_filename, entry): target_aspect = float(args.width)/args.height facedetect_call = [args.face_detector, "--cascade=%s" % args.cascade, source_filename] output = subprocess.check_output(facedetect_call) output = output.strip() if " " not in output: output += " " dimensions, faces_str = output.split(" ", 1) width, height = map(int, dimensions.split("x")) class Face(object): def __init__(self, left, top, face_width, face_height): self.top = int(top) self.left = int(left) self.width = int(face_width) self.height = int(face_height) def __repr__(self): return "(%d;%d)/%dx%d" % (self.left, self.top, self.width, self.height) null_faces = [Face(width / 2, 0, width / 2, 0)] faces = [Face(*face_str.split(",")) for face_str in faces_str.split()] source_aspect = float(width)/height #print target_aspect, source_aspect if source_aspect < target_aspect: faces_sorted = sorted(faces, lambda first, second: first.top < second.top) + null_faces cut_width = width cut_height = int(round(cut_width / target_aspect)) cut_left = 0 highest_face = faces_sorted[0] cut_top = max(0, highest_face.top - highest_face.height * 0.2) if cut_top + cut_height > height: cut_top = height - cut_height cut_top = int(round(cut_top)) else: faces_sorted = sorted(faces, lambda first, second: first.left < second.left) + null_faces cut_height = height cut_width = int(round(height * target_aspect)) cut_top = 0 leftest_face = faces_sorted[0] cut_left = max(0, leftest_face.left - leftest_face.width * 0.2) if cut_left + cut_width > width: cut_left = width - cut_width cut_left = int(round(cut_left)) assert cut_left >= 0 assert cut_top >= 0 assert cut_width <= width assert cut_height <= height print source_filename, faces print width, height, len(faces) print cut_left, cut_top, cut_width, cut_height inphoto = Image.open(source_filename) # Just for testing that where the faces are actually detected. # import ImageDraw # draw = ImageDraw.Draw(inphoto) # for face in faces: # draw.rectangle((face.left, face.top, face.left + face.width, face.top + face.height), fill=128) cut_region = (cut_left, cut_top, cut_left + cut_width, cut_top + cut_height) cropped = inphoto.crop(cut_region) scaled = cropped.resize((args.width, args.height), Image.ANTIALIAS) full_title = "%s by %s" % (entry['title'], entry['author']) section_name = asmmetadata.normalize_key(entry['section']['name']) thumbnail_path = os.path.join(args.data_root, "thumbnails", "small", section_name) basename = asmmetadata.normalize_key(full_title) jpeg_file = os.path.join(thumbnail_path, basename + ".jpeg") scaled.save(jpeg_file) png_file = os.path.join(thumbnail_path, basename + ".png") scaled.save(png_file)
cut_region = (cut_left, cut_top, cut_left + cut_width, cut_top + cut_height) cropped = inphoto.crop(cut_region) scaled = cropped.resize((args.width, args.height), Image.ANTIALIAS) full_title = "%s by %s" % (entry['title'], entry['author']) section_name = asmmetadata.normalize_key(entry['section']['name']) thumbnail_path = os.path.join(args.data_root, "thumbnails", "small", section_name) basename = asmmetadata.normalize_key(full_title) jpeg_file = os.path.join(thumbnail_path, basename + ".jpeg") scaled.save(jpeg_file) png_file = os.path.join(thumbnail_path, basename + ".png") scaled.save(png_file) entry_data = asmmetadata.parse_file(args.asmmetadata) for entry in entry_data.entries: section = asmmetadata.normalize_key( entry['section']['name']) if "photos" not in section: continue section_name = asmmetadata.normalize_key(entry['section']['name']) file_base = os.path.join(args.data_root, section_name) full_title = "%s by %s" % (entry['title'], entry['author']) basename = asmmetadata.normalize_key(full_title) source_file = os.path.join(file_base, basename + ".jpeg") if not os.path.isfile(source_file): continue create_small_thumbnail_file(args, source_file, entry)