def select_best_candidates(entry, filenames):
    entry_key = asmmetadata.get_entry_key(entry)
    match_values = []
    max_distance = 0.0
    for filename in filenames:
        file_key = normalize_remove_numeric_prefix(filename)
        file_key = normalize_remove_suffix(file_key)
        file_key = asmmetadata.normalize_key(file_key)
        distance = pyjarowinkler.distance.get_jaro_distance(
            entry_key, file_key)
        max_distance = max(distance, max_distance)
        match_values.append((filename, distance))
    if max_distance < 0.75:
        return []
    return [x[0] for x in filter(lambda x: x[1] == max_distance, match_values)]
import os.path
import subprocess
import sys

target_directory = sys.argv[1]
if not os.path.exists(target_directory):
    print "Target directory %s does not exist!" % target_directory
    sys.exit(1)

entry_data = asmmetadata.parse_file(sys.stdin)

for entry in entry_data.entries:
    if 'sceneorg' not in entry:
        continue
    sceneorg_path = entry['sceneorg']
    section_directory = entry['section']['key'].replace("-", "_")
    file_directory = asmmetadata.normalize_key("%s by %s" % (entry['title'], entry['author'])).replace("-", "_")
    download_directory = os.path.join(
        target_directory,
        section_directory,
        file_directory,
        )
    if not os.path.exists(download_directory):
        os.makedirs(download_directory)
    download_file = os.path.join(
        download_directory,
        os.path.basename(sceneorg_path)
        )
    download_url = "ftp://ftp.scene.org/pub%s" % sceneorg_path
    subprocess.call(['wget', '-O', download_file, download_url])
def print_entry(year, entry, description_generator):
    title = entry['title']
    author = entry['author']
    section_name = entry['section']['name']
    name = asmmetadata.get_entry_name(entry)

    normalized_name = asmmetadata.get_entry_key(entry)
    normalized_section = asmmetadata.normalize_key(section_name)
    position = entry.get('position', 0)

    extra_assets = ""

    locations = ""

    description = u""
    if 'warning' in entry:
        description += u"%s</p>\n<p>" % cgi.escape(entry['warning'])

    position_str = None

    if entry["section"].get("ranked", True):
        if position != 0:
            position_str = str(position) + asmmetadata.get_ordinal_suffix(position) + " place"

    has_media = False

    display_author = None
    if "Misc" in section_name or "Photos" in section_name:
        pass
    elif not "AssemblyTV" in section_name and not "Winter" in section_name:
        display_author = author
        if not "Seminars" in section_name:
            description += description_generator(entry, position_str)

    if 'description' in entry:
        description += u"%s</p>\n<p>" % cgi.escape(entry['description'])

    if 'platform' in entry:
        description += u"Platform: %s</p>\n<p>" % cgi.escape(entry['platform'])

    if 'techniques' in entry:
        description += u"Notes: %s</p>\n<p>" % cgi.escape(entry['techniques'])

    if display_author is not None:
        description += u"Author: %s\n" % cgi.escape(display_author)

    # Youtube is our primary location
    if "youtube" in entry:
        youtube_id_time = asmmetadata.get_timed_youtube_id(entry)
        has_media = True
        locations += "<location type='youtube'>%s</location>" % youtube_id_time

    # Youtube is primary location
    demoscenetv = entry.get('dtv')
    if demoscenetv:
        has_media = True
        locations += "<location type='demoscenetv'>%s</location>" % (escape(demoscenetv))

    # XXX prevent the creation of humongous files.
    # if 'galleriafi' in entry:
    #     return

    if entry.get('image-file') or entry.get('galleriafi'):
        image_file = entry.get('image-file')
        if image_file is None:
            image_file = "%s/%s.jpeg" % (normalized_section, normalized_name)
        if asmmetadata.is_image(image_file):
            has_media = True
            baseprefix, _ = image_file.split(".")
            viewfile, postfix = select_smaller_thumbnail(os.path.join(FILEROOT, 'thumbnails/large/%s' % baseprefix))

            normal_prefix = asmmetadata.normalize_key(baseprefix)
            image_filename = normal_prefix + postfix
            locations += "<location type='image'>%s|%s</location>" % (image_filename, escape(name))

            extra_assets += display_asset(
                "%d/%s/%s/%s" % (year, normalized_section, normalized_name, image_filename), name, viewfile)

    webfile = entry.get('webfile')
    if webfile:
        if asmmetadata.is_image(webfile):
            has_media = True
            baseprefix, _ = webfile.split(".")
            viewfile, postfix = select_smaller_thumbnail(os.path.join(FILEROOT, 'thumbnails/large/%s' % baseprefix))

            normal_prefix = asmmetadata.normalize_key(baseprefix)
            image_filename = normal_prefix + postfix
            locations += "<location type='download'>http://media.assembly.org/compo-media/assembly%d/%s|Full resolution</location>" % (year, webfile)
            locations += "<location type='image'>%s|%s</location>" % (image_filename, escape(name))

            extra_assets += display_asset(
                "%d/%s/%s/%s" % (year, normalized_section, normalized_name, image_filename), name, viewfile)
        elif webfile.endswith(".mp3"):
            locations += "<location type='download'>http://media.assembly.org/compo-media/assembly%d/%s|MP3</location>" % (year, webfile)

    pouet = entry.get('pouet')
    if pouet:
        locations += "<location type='pouet'>%s</location>" % (pouet)

    download = entry.get('download')
    if download:
        download_type = "Original"
        if "game" in section_name.lower():
            download_type = "Playable game"
        locations += "<location type='download'>%s|%s</location>" % (escape(download), download_type)

    sceneorg = entry.get('sceneorg')
    if sceneorg:
        download_type = "Original"
        if "game" in section_name.lower():
            download_type = "Playable game"
        if ";" in sceneorg:
            parts = sceneorg.split(";")
            i = 1
            for part in parts:
                locations += "<location type='sceneorg'>%s|%s (%d/%d)</location>" % (
                    escape(part), download_type, i, len(parts))
                i += 1
        else:
            locations += "<location type='sceneorg'>%s|%s</location>" % (escape(sceneorg), download_type)

    sceneorgvideo = entry.get('sceneorgvideo')
    mediavideo = entry.get('media')
    if sceneorgvideo:
        locations += "<location type='sceneorg'>%s|HQ video</location>" % (escape(sceneorgvideo))
    elif mediavideo:
        locations += "<location type='download'>http://media.assembly.org%s|HQ video</location>" % (mediavideo)

    galleriafi = entry.get("galleriafi")
    if galleriafi:
        locations += "<location type='download'>http://assembly.galleria.fi%s|Original image</location>" % (galleriafi)

    if not has_media:
        return

    has_thumbnail = False
    if entry.get('use-parent-thumbnail', False) is True:
        has_thumbnail = True
    else:
        thumbnail_data = get_thumbnail_data(entry)
        if thumbnail_data is not None:
            has_thumbnail = True

    if not has_thumbnail:
        return

    ranking = 'ranking="%d"' % position
    if position == 0:
        ranking = ''

    description_non_unicode = description

    tags = set()
    entry_tags = entry.get('tags')
    if entry_tags:
        tags.update(entry_tags.split(" "))

    if entry.get('use-parent-thumbnail', False) is False:
        thumbnail_asset = """
  <asset path="%(year)s/%(normalizedsection)s/%(normalizedname)s/thumbnail">
    <edition parameters="lang: workflow:public"
         title=%(title)s
         tags="hide-search"
         created="%(current-time)s"
         modified="%(current-time)s"><![CDATA[%(data)s
]]></edition>
  </asset>
""" % {'year': year,
       'normalizedsection': normalized_section,
       'normalizedname': normalized_name,
       'data': base64.encodestring(thumbnail_data),
       'title': quoteattr(title),
       'current-time': CURRENT_TIME,
       }
    else:
        thumbnail_asset = ''

    asset_data = """
  <externalasset path="%(year)s/%(normalizedsection)s/%(normalizedname)s">
    <edition parameters="lang: workflow:public"
         title=%(title)s
         tags=%(tags)s
         created="%(current-time)s"
         modified="%(current-time)s">
      <mediagalleryadditionalinfo
          author=%(author)s
          description=%(description)s
          %(ranking)s></mediagalleryadditionalinfo>
      %(locations)s
    </edition>
  </externalasset>
%(thumbnail)s
""" % {'year': year,
       'normalizedsection': normalized_section,
       'normalizedname': normalized_name,
       'title': quoteattr(title),
       'author': quoteattr(author),
       'ranking': ranking,
       'thumbnail': thumbnail_asset,
       'locations': locations,
       'description': quoteattr(description_non_unicode),
       'current-time': CURRENT_TIME,
       'tags': quoteattr(" ".join(tags)),
       }
    asset_data_str = asset_data.encode("utf-8")
    print asset_data_str
    extra_assets_str = extra_assets.encode("utf-8")
    print extra_assets_str
    if author == "":
        author = "unknown"

    known_titles = set()
    for image_path, image_data in sorted(files.items(), lambda x, y: cmp(x[0], y[0])):
        image_name = image_path.replace(folder_key, "")
        title = urllib.unquote_plus(image_name)
        next_id = 2
        new_title = title
        while new_title.lower() in known_titles:
            new_title = "%s-%d" % (title, next_id)
            next_id += 1
        title = new_title
        known_titles.add(title.lower())
        filename = asmmetadata.normalize_key(
            "%s by %s" % (title, author)) + ".jpeg"
        print_shell(
            "wget -nc --no-host '%s://%s%s?img=full' -O '%s'/%s" % (
                parsed_url.scheme,
                parsed_url.netloc,
                image_path,
                args.photos_root,
                filename))
        image_file = "%s/%s" % (photo_category, filename)
        print_metadata("author:%s|title:%s|galleriafi:%s|image-file:%s" % (
                author.encode("utf-8"),
                title.encode("utf-8"),
                image_path.encode("utf-8"),
                image_file.encode("utf-8")))
Beispiel #5
0
    author = author.replace("<", "-").replace(">", "-")

    position = entryinfo.get('position', None)
    if position is None:
        #position_filename = "9%02d" % zero_position
        position_filename = "-99"
    else:
        position_filename = "-%02d" % position

    if section.lower() in ["misc", "assemblytv", "winter", "seminars"]:
        position_filename = ""

    source_file_base = asmmetadata.normalize_key(
        "%s-%s%s-%s-by-%s" % (
            year,
            section,
            position_filename,
            title,
            author)
        )
    source_file = os.path.join(
        files_root, year, source_file_base + video_postfix)

    if not os.path.exists(source_file) and 'video-file' in entryinfo:
        source_file = os.path.join(files_root, entryinfo['video-file'])

    if not os.path.exists(source_file) and 'media' in entryinfo and not media_vod_directory is None:
        source_file = os.path.join(media_vod_directory, entryinfo['media'].lstrip("/"))

    if not os.path.exists(source_file):
        print line.encode('utf-8')
        continue
def create_small_thumbnail_file(args, source_filename, entry):
    target_aspect = float(args.width)/args.height

    facedetect_call = [args.face_detector, "--cascade=%s" % args.cascade, source_filename]
    output = subprocess.check_output(facedetect_call)
    output = output.strip()

    if " " not in output:
        output += " "

    dimensions, faces_str = output.split(" ", 1)

    width, height = map(int, dimensions.split("x"))

    class Face(object):
        def __init__(self, left, top, face_width, face_height):
            self.top = int(top)
            self.left = int(left)
            self.width = int(face_width)
            self.height = int(face_height)

        def __repr__(self):
            return "(%d;%d)/%dx%d" % (self.left, self.top, self.width, self.height)

    null_faces = [Face(width / 2, 0, width / 2, 0)]
    faces = [Face(*face_str.split(",")) for face_str in faces_str.split()]

    source_aspect = float(width)/height
    #print target_aspect, source_aspect

    if source_aspect < target_aspect:
        faces_sorted = sorted(faces, lambda first, second: first.top < second.top) + null_faces
        cut_width = width
        cut_height = int(round(cut_width / target_aspect))
        cut_left = 0
        highest_face = faces_sorted[0]
        cut_top = max(0, highest_face.top - highest_face.height * 0.2)
        if cut_top + cut_height > height:
            cut_top = height - cut_height
        cut_top = int(round(cut_top))
    else:
        faces_sorted = sorted(faces, lambda first, second: first.left < second.left) + null_faces
        cut_height = height
        cut_width = int(round(height * target_aspect))
        cut_top = 0
        leftest_face = faces_sorted[0]
        cut_left = max(0, leftest_face.left - leftest_face.width * 0.2)
        if cut_left + cut_width > width:
            cut_left = width - cut_width
        cut_left = int(round(cut_left))

    assert cut_left >= 0
    assert cut_top >= 0
    assert cut_width <= width
    assert cut_height <= height

    print source_filename, faces
    print width, height, len(faces)
    print cut_left, cut_top, cut_width, cut_height

    inphoto = Image.open(source_filename)

    # Just for testing that where the faces are actually detected.
    # import ImageDraw
    # draw = ImageDraw.Draw(inphoto)
    # for face in faces:
    #     draw.rectangle((face.left, face.top, face.left + face.width, face.top + face.height), fill=128)

    cut_region = (cut_left, cut_top, cut_left + cut_width, cut_top + cut_height)
    cropped = inphoto.crop(cut_region)
    scaled = cropped.resize((args.width, args.height), Image.ANTIALIAS)

    full_title = "%s by %s" % (entry['title'], entry['author'])

    section_name = asmmetadata.normalize_key(entry['section']['name'])
    thumbnail_path = os.path.join(args.data_root, "thumbnails", "small", section_name)
    basename = asmmetadata.normalize_key(full_title)
    jpeg_file = os.path.join(thumbnail_path, basename + ".jpeg")
    scaled.save(jpeg_file)
    png_file = os.path.join(thumbnail_path, basename + ".png")
    scaled.save(png_file)
    cut_region = (cut_left, cut_top, cut_left + cut_width, cut_top + cut_height)
    cropped = inphoto.crop(cut_region)
    scaled = cropped.resize((args.width, args.height), Image.ANTIALIAS)

    full_title = "%s by %s" % (entry['title'], entry['author'])

    section_name = asmmetadata.normalize_key(entry['section']['name'])
    thumbnail_path = os.path.join(args.data_root, "thumbnails", "small", section_name)
    basename = asmmetadata.normalize_key(full_title)
    jpeg_file = os.path.join(thumbnail_path, basename + ".jpeg")
    scaled.save(jpeg_file)
    png_file = os.path.join(thumbnail_path, basename + ".png")
    scaled.save(png_file)

entry_data = asmmetadata.parse_file(args.asmmetadata)
for entry in entry_data.entries:
    section = asmmetadata.normalize_key( entry['section']['name'])
    if "photos" not in section:
        continue

    section_name = asmmetadata.normalize_key(entry['section']['name'])
    file_base = os.path.join(args.data_root, section_name)
    full_title = "%s by %s" % (entry['title'], entry['author'])
    basename = asmmetadata.normalize_key(full_title)
    source_file = os.path.join(file_base, basename + ".jpeg")

    if not os.path.isfile(source_file):
        continue

    create_small_thumbnail_file(args, source_file, entry)