from functools import partial import os from lxml import html, cssselect from sync_media_function import sync_media from metafuncs import branch, combine, maybe, tryit, getitem, cache, Pipe # Support functions is_not_none = lambda obj: obj is not None unique = lambda sequence: list(set(sequence)) get_html = html.parse # :: str -> ElementTree img_tags = cssselect.CSSSelector('img') # :: ElementTree -> List[Element] get_src = maybe(getitem('src'), # normal maybe(getitem('data-src'), # photo pages maybe(getitem('data-srcset')))) # home page BACKGROUND_IMAGE_REGEX = re.compile(r'background-image: url\((.*?\))') read_page = lambda url: urllib2.urlopen(url).read() path_to_url = lambda path: "file://"+os.path.abspath(path) PATH_SPLIT_REGEX = re.compile(r'^(/media)/(.*)/(.*?\..*?)$') # PATH_SPLIT_REGEX = re.compile(r'/media/(.*)/(') split_path_parts = lambda src: PATH_SPLIT_REGEX.match(src).groups() # :: str -> PathPats MEDIA_PATH_REGEX = re.compile(r'/media/') is_media_path = lambda path: MEDIA_PATH_REGEX.search(path) is not None fixup_cdn = lambda url: "http:"+url if url.startswith('//cdn') else url def crop(prefix): def wrapped(word):
from sync_media_function import sync_media from metafuncs import branch, combine, maybe, tryit, getitem, cache # Composable = Pipe from monad.composable import Composable import functools F = functools.partial # Support functions is_not_none = lambda obj: obj is not None unique = lambda sequence: list(set(sequence)) get_html = html.parse # :: str -> ElementTree img_tags = cssselect.CSSSelector('img') # :: ElementTree -> List[Element] get_src = maybe(getitem('src'), # normal maybe(getitem('data-src'), # photo pages maybe(getitem('data-srcset')))) # home page BACKGROUND_IMAGE_REGEX = re.compile(r'background-image: url\((.*?\))') read_page = lambda url: urllib2.urlopen(url).read() PATH_SPLIT_REGEX = re.compile(r'^(/media)/(.*)/(.*?\..*?)$') # PATH_SPLIT_REGEX = re.compile(r'/media/(.*)/(') split_path_parts = lambda src: PATH_SPLIT_REGEX.match(src).groups() # :: str -> PathPats MEDIA_PATH_REGEX = re.compile(r'/media/') is_media_path = lambda path: MEDIA_PATH_REGEX.search(path) is not None # Composite work-horse functions # Retreive src-like properties from <img> tags get_img_srcs = ( Composable() # :: Location >> cache(get_html) # :: ElementTree >> img_tags # :: List[Element]