def __init__(self, title=None, source_id=None, lang="en"): super(File, self).__init__(title=title, source_id=source_id, lang=lang) self.filename = get_name_from_url(source_id) self.source_id = urljoin(FolkDCChef.BASE_URL, self.source_id)\ if source_id.startswith("/") else self.source_id self.filepath = None self.name = get_name_from_url_no_ext(self.filename)
def __init__(self, source_id, lang="en", name=None): self.filename = get_name_from_url(source_id) self.source_id = urljoin( BASE_URL, source_id) if source_id.startswith("/") else source_id self.filepath = None self.lang = lang self.name = "{}_{}".format(name, self.filename)
def __init__(self, source_id, lang="en", lincese="", name=None): self.filename = get_name_from_url(source_id) self.source_id = urljoin( BASE_URL, source_id) if source_id.startswith("/") else source_id self.filepath = None self.lang = lang self.name = "{}_{}".format(name, self.filename) self.license = get_license( licenses.CC_BY_NC_SA, copyright_holder=COPYRIGHT_HOLDER).as_dict()
def get_images(self, content): for img in content.findAll("img"): if img["src"].startswith("/"): img_src = urljoin(BASE_URL, img["src"]) else: img_src = img["src"] filename = get_name_from_url(img_src) if img_src not in self.images and img_src: img["src"] = filename self.images[img_src] = filename
def to_local_images(self, content): images_urls = {} for img in content.find_all("img"): try: img_src = img["src"] except KeyError: continue else: if img_src.startswith("/"): img_src = urljoin(FolkDCChef.BASE_URL, img_src) filename = get_name_from_url(img_src) if img_src not in images_urls and img_src: img["src"] = filename images_urls[img_src] = filename return images_urls
def __init__(self, title=None, source_id=None, lang="ar"): super(File, self).__init__(title=title, source_id=source_id, lang=lang) self.filename = get_name_from_url(source_id) self.filepath = None self.lang = lang