import feedparser import logging import lxml.html from datetime import datetime import re from lxml.html.clean import Cleaner from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class Glasslands(ShowParser): BASE_URL = "http://www.glasslands.com/" FEED_URL = "http://glasslands.blogspot.com/feeds/posts/default" SHOW_DIVIDER_RE = re.compile('___+') HEADER_SEP_RE = re.compile('(?:(?:>)|/){2}') BODY_SKIP = [ re.compile('rsvp', re.I), re.compile('tickets', re.I) ] REPLACEMENTS = [ (re.compile('<\s*br\s*/?\s*>', re.I), '\n'), (re.compile('<\s*div\s*.*?>', re.I), ''), (re.compile('<\s*/\s*div\s*>', re.I), '\n'), (re.compile('details\s+tba', re.I), '') ] def __init__(self, *args, **kwargs):
import logging import re from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class KnittingFactory(ShowParser): BASE_URL = "http://bk.knittingfactory.com/" CALENDAR_URL = "http://bk.knittingfactory.com/calendar/" IS_EVENT = re.compile("http://bk.knittingfactory.com/event-details/\?tfly_event_id=\d+") def __init__(self, *args, **kwargs): super(KnittingFactory, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parser = self._get_parser() while(True): return self._parser.next() raise StopIteration def _get_parser(self): show_urls = html_util.get_show_urls(self.CALENDAR_URL, ".tfly-calendar", 'td', self.IS_EVENT)
# coding=UTF-8 import logging import re from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class Pianos(ShowParser): BASE_URL = "http://www.pianosnyc.com/" CALENDAR_URLS = ['http://www.pianosnyc.com/showroom', 'http://www.pianosnyc.com/upstairs'] IS_EVENT = re.compile("http://www.pianosnyc.com/(upstairs|showroom)/[^/]+-\d+") DATE_RE = re.compile('(\w+,\s+\w+\s+\d+,\s+\d+)') def __init__(self, *args, **kwargs): super(Pianos, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parser = self._get_parser() while(True): return self._parser.next()
import logging from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ResourceExtractor, ShowResourceHandler from fancyashow.extensions import ArtistResourceHandler, ArtistMediaExtractor from fancyashow.extensions.models import VideoInfo from fancyashow.util.resources import URLMatch, HrefMatcher, ParamMatcher from fancyashow.util import lang, artist_matcher from fancyashow.systems.vimeo import api logger = logging.getLogger(__name__) extensions = ExtensionLibrary() HREF_URL = URLMatch('(www\.)?vimeo.com/(?P<video_id>\d+)\/?') EMBED_URL = URLMatch('(www\.)?vimeo.com/.*?clip_id=(?P<video_id>\d+)') class VimeoResourceExtractor(ResourceExtractor): def resources(self, node): def url(m): return 'vimeo-video:%s' % m.group('video_id') ret = [] ret.extend([url(m) for m in HrefMatcher( node, HREF_URL)]) ret.extend([url(m) for m in ParamMatcher(node, 'movie', EMBED_URL)]) return ret def _trans_video(video, source_id):
import re import logging from datetime import datetime, timedelta from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() logger = logging.getLogger(__name__) class BellHouse(ShowParser): BASE_URL = "http://www.thebellhouseny.com/" CALENDAR_URL = "http://www.thebellhouseny.com/calendar.php" DATE_RE = re.compile("\w+ (?P<month>\d+)/(?P<day>\d+)") TIME_RE = re.compile('\s+(?P<time>\d+(?:\s*:\s*\d+)?\s*(?:am|pm|a\.m\.|p\.\m))\s*', re.IGNORECASE) def __init__(self, *args, **kwargs): super(BellHouse, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parser_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next()
import logging import lxml.html import re from lxml.html.clean import Cleaner from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class BruarFalls(ShowParser): BASE_URL = "http://www.bruarfalls.com/" NUM_RE = re.compile('^\s*\d+\s*(?:st|nd|rd|th):\s*', re.IGNORECASE | re.MULTILINE) def __init__(self, *args, **kwargs): super(BruarFalls, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parse_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next()
from fancyashow.extensions import ExtensionLibrary from fancyashow.parsers.common.lastfm import LastFMParser from fancyashow.extensions.models import Venue extensions = ExtensionLibrary() class IrvingPlaza(LastFMParser): def venue_id(self): return 8778815 def venue(self): return Venue('Irving Plaza', 'http://www.irvingplaza.com/') @classmethod def id(self): return 'us.ny.manhattan.irving-plaza' extensions.register_show_parser(IrvingPlaza)
import logging from copy import deepcopy from datetime import datetime, timedelta from fancyashow.extensions import ExtensionLibrary from fancyashow.extensions.artists import ArtistProcessor from fancyashow.processing import ProcessorSetup from fancyashow.db.models import SystemStat logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class ArtistRanking(ArtistProcessor): def __init__(self, library, settings): super(ArtistRanking, self).__init__(library, settings) self.sample_days = self.get_required_setting(settings, 'sample_days') self._stats = None self.sample_end = datetime.today() + timedelta(days = 1) self.sample_start = (datetime.today() - timedelta(days = self.sample_days)).replace(hour = 0, minute = 0, second = 0) def stats(self): if not self._stats: stats = { } for stat in SystemStat.objects(): stats[ stat.system_id ] = stat self._stats = stats return self._stats
import logging from datetime import datetime import urlparse import re from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class Coco66(ShowParser): BASE_URL = "http://www.coco66.com/" MONTH_FORMAT = 'http://coco66.com/?page=calendar&month=%(month)s&year=%(year)s' IS_EVENT = re.compile("http://(?:www.)?coco66.com/\?page=event&id=\d+") EVENT_ID = re.compile('id=(\d+)') # Allows for 2009-09-01, Monday, October 11th, 2010, or Tonight HEADER_PARSE = re.compile('(?P<date>(?:\d+-\d+-\d+)|(?:\w+\s+\d+/\d+)|(\w+,\s+\w+\s+\d+\w+,\s+\d+)|(?:tonight))\s*-\s*(?P<title>.*)', re.I) MONTHS_AHEAD = 3 def __init__(self, *args, **kwargs): super(Coco66, self).__init__(*args, **kwargs) self._current_parser = None self._date_queue = self._request_dates() self._parsed_dates = { } def _next_parser(self): if self._date_queue: return self._month_parser(self._date_queue.pop(0))
import logging import urlparse import re from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class LPR(ShowParser): BASE_URL = "http://lepoissonrouge.com/" CALENDAR_URL = "http://lepoissonrouge.com/calendar/%(year)s/%(month)s/" IS_EVENT_URL_RE = re.compile('http://lepoissonrouge.com/events/view/\d+') IS_ARTIST_URL_RE = re.compile('http://lepoissonrouge.com/events/artist/\d+') MONTHS_AHEAD = 3 def __init__(self, *args, **kwargs): super(LPR, self).__init__(*args, **kwargs) self._current_parser = None self._date_queue = self._request_dates() def _next_parser(self): if self._date_queue: return self._month_parser(self._date_queue.pop(0)) return None
import logging import re import urllib2 import lxml.html from lxml import etree from fancyashow.extensions import ExtensionLibrary, ResourceExtractor, ShowResourceHandler, ArtistResourceHandler from fancyashow.util.resources import URLMatch, HrefMatcher, TextMatcher from fancyashow.db.models import ArtistProfile from fancyashow.util import artist_matcher, parsing logger = logging.getLogger(__name__) extensions = ExtensionLibrary() PROFILE_ID = "(?P<profile_id>[\d\w_.-]+)" NAME_RE = re.compile('\s*(.+)(?:\s+\(.+?\)\s+on twitter.*)', re.I | re.M) TWITTER_URL = URLMatch('(?:www\.)?twitter\.com/%s' % PROFILE_ID) class TwitterResourceExtractor(ResourceExtractor): def resources(self, node): def uri(match): return 'twitter-profile:%s' % m.group('profile_id') ret = [] ret.extend( [ uri(m) for m in HrefMatcher(node, TWITTER_URL) ] ) ret.extend( [ uri(m) for m in TextMatcher(node, TWITTER_URL) ] ) return ret
import logging from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ArtistMediaExtractor, ResourceExtractor from fancyashow.extensions import ShowResourceHandler, ArtistResourceHandler from fancyashow.extensions.models import VideoInfo from fancyashow.db.models import ArtistProfile from fancyashow.util.resources import URLMatch, HrefMatcher, ParamMatcher, TextMatcher from fancyashow.util import artist_matcher from bandcamp import BandCampService from fancyashow.systems.bandcamp.settings import API_KEY logger = logging.getLogger(__name__) extensions = ExtensionLibrary() PROFILE_ID = "(?P<profile_id>[\d\w_.-]+)" HREF_URL = URLMatch('%s.bandcamp.com' % PROFILE_ID) SYSTEM_ID = 'bandcamp' class BandCampResourceExtractor(ResourceExtractor): def resources(self, node): def url(m): return 'bandcamp-profile:%s' % m.group('profile_id') ret = [] ret.extend([url(m) for m in HrefMatcher( node, HREF_URL)]) ret.extend([url(m) for m in TextMatcher( node, HREF_URL)])
import logging from datetime import datetime from fancyashow.extensions import ArtistProcessor, ResourceExtractorManager from fancyashow.extensions import ExtensionLibrary logger = logging.getLogger(__name__) extensions = ExtensionLibrary() # State form # $system_id:$profile_id -> { # last_parsed: $timestamp, # resources: [] # } class ProfileParser(ArtistProcessor): def __init__(self, library, settings): super(ProfileParser, self).__init__(library, settings) self.update_interval = self.get_required_setting(settings, "update_interval") self.resource_extractor = ResourceExtractorManager(library.resource_extractors()) @classmethod def default_state(self): return {} def process(self, artist, state, dependent_states): new_state = self.clone_state(state) media = []
import logging from datetime import date from fancyashow.parsers.common.google_calendar import GoogleCalendarParser from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() logger = logging.getLogger(__name__) class SpikeHill(GoogleCalendarParser): def _process_recurring_entries(self, entries): return [] def _process_entry_group(self, start_date, entries): show = Show() show.venue = self.venue() show.performers = [] entries.sort(key = lambda e: e.when[0].start_time, reverse = True) for entry in entries: logger.debug("Processing entry: %s, starting on: %s" % (entry.title.text, entry.when[0].start_time)) # Full day events usually denote a title which we currently will simply skip if 'T' not in entry.when[0].start_time: logger.debug('Entry "%s" is an all day event, skipping' % entry.title.text)
import logging import urlparse import re from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class BoweryPresentsBase(ShowParser): BASE_URL = "http://www.brooklynbowl.com/" CALENDAR_URL = "http://www.brooklynbowl.com/upcoming-events/" IMAGE_RE = re.compile('background-image:\s*url\((?P<image_path>.+)\)') def __init__(self, *args, **kwargs): super(BoweryPresentsBase, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parser = self._get_parser() while(True): return self._parser.next() raise StopIteration
import logging from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ArtistMediaExtractor, ResourceExtractor from fancyashow.extensions import ShowResourceHandler, ArtistResourceHandler from fancyashow.extensions.models import VideoInfo from fancyashow.util.resources import URLMatch, HrefMatcher, ParamMatcher, TagAttrMatcher from fancyashow.util import artist_matcher from fancyashow.systems.youtube import api logger = logging.getLogger(__name__) extensions = ExtensionLibrary() HREF_URL = URLMatch('(www\.)?youtube.com/watch?(.+&)?v=(?P<video_id>[^&])+') EMBED_URL = URLMatch('(www\.)?youtube.com/v/(?P<video_id>[^&]+)') NEW_EMBED_URL = URLMatch('(www\.)?youtube.com/embed/(?P<video_id>[^&]+)') class YouTubeResourceExtractor(ResourceExtractor): def resources(self, node): def url(m): return 'youtube-video:%s' % m.group('video_id') ret = [] ret.extend([url(m) for m in HrefMatcher( node, HREF_URL)]) ret.extend([url(m) for m in ParamMatcher(node, 'movie', EMBED_URL)]) ret.extend([url(m) for m in TagAttrMatcher(node, ['iframe'], ['src'], NEW_EMBED_URL)]) return ret
from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class UnionPool(ShowParser): BASE_URL = "http://unionpool.blogspot.com/" def __init__(self, *args, **kwargs): super(UnionPool, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parse_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next() raise StopIteration def _get_parser(self): doc = html_util.fetch_and_parse(self.BASE_URL) sidebar = doc.get_element_by_id("sidebar-right-1")
import logging import urllib2 import lxml.html import re from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class Europa(ShowParser): BASE_URL = "http://www.europalive.net/" CALENDAR_URL = "http://www.europalive.net/calendar.html" IMAGE_RE = re.compile('/images/band_images/') def __init__(self, *args, **kwargs): super(Europa, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parse_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next()
from fancyashow.extensions import ExtensionLibrary from fancyashow.parsers.common.lastfm import LastFMParser from fancyashow.extensions.models import Venue extensions = ExtensionLibrary() class RadioCityMusicHall(LastFMParser): def venue_id(self): return 8906841 def venue(self): return Venue('Radio City Music Hall', 'http://www.radiocity.com/') @classmethod def id(self): return 'us.ny.manhattan.radio-city-music-hall' extensions.register_show_parser(RadioCityMusicHall)
import re from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class Southpaw(ShowParser): BASE_URL = "http://spsounds.com/" CALENDAR_URL = "http://spsounds.com/calendar/" IS_EVENT = re.compile("http://www.santospartyhouse.com/index/event/id/\d+") def __init__(self, *args, **kwargs): super(Southpaw, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parse_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next() raise StopIteration def _get_parser(self):
import logging from datetime import datetime from fancyashow.extensions import ExtensionLibrary, ArtistMediaExtractor, ResourceExtractor from fancyashow.extensions import ShowResourceHandler, ArtistResourceHandler from fancyashow.extensions.models import VideoInfo from fancyashow.util.resources import URLMatch, HrefMatcher, ParamMatcher, TextMatcher from fancyashow.util import artist_matcher logger = logging.getLogger(__name__) extensions = ExtensionLibrary() PROFILE_ID = "(?P<profile_id>[\d\w_.-]+)" HREF_URL = URLMatch('(www\.)?soundcloud.com/%s' % PROFILE_ID) EMBED_URL = URLMatch('player.soundcloud.com/player.swf?url=http[s]?%3A%2F%2Fsoundcloud.com%2F(?P<profile_id>[^%]+)') class SoundCloudResourceExtractor(ResourceExtractor): def resources(self, node): def url(m): return 'soundcloud-profile:%s' % m.group('profile_id') ret = [] ret.extend([url(m) for m in HrefMatcher( node, HREF_URL)]) ret.extend([url(m) for m in ParamMatcher(node, 'movie', EMBED_URL)]) ret.extend([url(m) for m in TextMatcher( node, HREF_URL)]) return ret
from fancyashow.extensions import ExtensionLibrary from fancyashow.parsers.common.lastfm import LastFMParser from fancyashow.extensions.models import Venue extensions = ExtensionLibrary() class RoselandBallroom(LastFMParser): def venue_id(self): return 8780835 def venue(self): return Venue('Roseland Ballroom', 'http://www.roselandballroom.com/') @classmethod def id(self): return 'us.ny.manhattan.roseland-ballroom' extensions.register_show_parser(RoselandBallroom)
from datetime import datetime from copy import deepcopy import logging from fancyashow.extensions import ExtensionLibrary from fancyashow.extensions.shows import ShowProcessor from fancyashow.processing import ProcessorSetup from fancyashow.util import artist_matcher logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class ArtistAssociation(ShowProcessor): def process(self, show, state, dependent_states): artist_ids = [] for artist_info in show.artists: if not artist_info.artist_id: artist = artist_matcher.get_artist(artist_info.name) if artist: artist_info.artist_id = artist.id if artist_info.artist_id: artist_ids.append(artist_info.artist_id) show.artist_ids = artist_ids def cleanup(self, show, state): pass
# encoding=UTF-8 import logging from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.parsers.common.lastfm import LastFMParser from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util import re logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class WebsterHall(LastFMParser): def venue_id(self): return 8778811 def venue(self): return Venue('Webster Hall', "http://www.websterhall.com/") @classmethod def id(cls): return 'us.ny.manhattan.webster-hall' class WebsterHallStudio(LastFMParser): def venue_id(self): return 9004895 def venue(self):
from fancyashow.extensions import ExtensionLibrary from fancyashow.parsers.common.myspace import MyspaceParser from fancyashow.extensions.models import Venue extensions = ExtensionLibrary() class CameoGallery(MyspaceParser): def venue(self): return Venue('Cameo Gallery', 'http://www.myspace.com/cameogallery/') @classmethod def profile_id(self): return 459194474 @classmethod def id(self): return 'us.ny.brooklyn.cameo-gallery' extensions.register_show_parser(CameoGallery)
import logging import re from datetime import date from fancyashow.parsers.common.google_calendar import GoogleCalendarParser from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() logger = logging.getLogger(__name__) class Gutter(GoogleCalendarParser): BACK_ROOM_RE = re.compile('^back\s+room\s*:?\s*', re.I) def _process_recurring_entries(self, entries): return [] def _process_entry(self, entry): logger.debug("Processing entry: %s, starting on: %s" % (entry.title.text, entry.when[0].start_time)) if not self.BACK_ROOM_RE.match(entry.title.text): return None title_txt = self.BACK_ROOM_RE.sub('', entry.title.text) show = Show() show.venue = self.venue()
import re from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() class BrooklynBowl(ShowParser): BASE_URL = "http://www.brooklynbowl.com/" CALENDAR_URL = "http://www.brooklynbowl.com/calendar/" IS_EVENT = re.compile("http://www.brooklynbowl.com/event/(?P<event_id>\d+)") def __init__(self, *args, **kwargs): super(BrooklynBowl, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parser = self._get_parser() while True: return self._parser.next() raise StopIteration def _get_parser(self): show_urls = html_util.get_show_urls(self.CALENDAR_URL, ".list-view", ".list-view-item", self.IS_EVENT)
import subprocess import logging from tempfile import TemporaryFile from fancyashow.extensions.artists import ArtistProcessor from fancyashow.util.storage import ChunkedFile from fancyashow.extensions import ExtensionLibrary from datetime import datetime from fancyashow.db.models import Audio, Video, MediaStatsHistory from fancyashow.extensions.models import AudioInfo, VideoInfo logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class MediaExtractionProcessor(ArtistProcessor): def __init__(self, library, settings): super(MediaExtractionProcessor, self).__init__(library, settings) self.update_interval = self.get_required_setting(settings, "update_interval") @classmethod def default_state(self): return {"extractor_state": {}} def process(self, artist, state, dependent_states): new_state = self.clone_state(state) media = []
import logging import re from datetime import datetime from urllib2 import HTTPError from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util logger = logging.getLogger(__name__) extensions = ExtensionLibrary() class CakeShop(ShowParser): BASE_URL = "http://cake-shop.com/" CALENDAR_RE = re.compile('%sshow-calendar/' % BASE_URL, re.IGNORECASE) DATE_RE = re.compile("\w+\s+(?P<day>\d+)") NUM_RE = re.compile('^\s*\d+\s*(?:st|nd|rd|th):\s*$', re.IGNORECASE | re.MULTILINE) TIME_RE = re.compile('^\s*(?P<time>\d+(?::\d+)?\s*(?:am|pm))\s*', re.IGNORECASE | re.MULTILINE) PRICE_OR_AGE = re.compile('(?:\$\d+|\d+\+)', re.IGNORECASE | re.MULTILINE) MONTHS_AHEAD = 3 def __init__(self, *args, **kwargs): super(CakeShop, self).__init__(*args, **kwargs) self._current_parser = None self._date_queue = self._request_dates() def _next_parser(self): if self._date_queue:
import re import logging from datetime import datetime, timedelta from fancyashow.extensions import ExtensionLibrary, ShowParser from fancyashow.extensions.models import Venue, Performer, Show from fancyashow.util import parsing as html_util from fancyashow.util import dates as date_util from fancyashow.util import lang as lang_util extensions = ExtensionLibrary() logger = logging.getLogger(__name__) class UnionHall(ShowParser): BASE_URL = "http://www.unionhallny.com/" CALENDAR_URL = "http://www.unionhallny.com/calendar.php" DATE_RE = re.compile("\w+ (?P<month>\d+)/(?P<day>\d+):") TIME_RE = re.compile(':\s+(?P<time>\d+(?:\s*:\s*\d+)?\s*[ap]\.?m\.?)\s*', re.IGNORECASE) def __init__(self, *args, **kwargs): super(UnionHall, self).__init__(*args, **kwargs) self._parser = None def next(self): if not self._parser: self._parse_started = datetime.now() self._parser = self._get_parser() while(True): return self._parser.next()