def parse_classes(self, response): logging.info(response) for col in response.css('div.wpb_text_column'): col_text = self._extract_text(col) if 'BEGINNING' in col_text or 'ADVANCED' in col_text: for row in col.css('p'): text = self._extract_text(row) if u'–' in text and re.search('\d:\d\d', text): datetime, class_details = text.split(u'–', 1) teacher_match = re.search(r'\((.+?)\)', class_details.strip()) if teacher_match: teacher = teacher_match.group(1) else: logging.error('Could not find teacher: %s', class_details) teacher = '' class_details = re.sub('\(%s\)' % teacher, '', class_details) item = items.StudioClass() item['style'] = class_details.title() item['teacher'] = teacher.title() # do we care?? row[4] start_time, end_time = parse_times(datetime.strip()) item['start_time'] = start_time item['end_time'] = end_time if self._valid_item(item): for new_item in self._repeated_items_iterator(item): yield new_item
def parse_meeting_schedule(filename): date_length = len('0000-00-00') + 1 session_re = re.compile(u'제(?P<session>[0-9]+)회') sitting_re = re.compile(u'제(?P<sitting>[0-9]+)차') with open(filename, 'r') as f: p = get_webpage(f) raw_titles = p.xpath(xpath_title)[0:] link_params = p.xpath(xpath_link_params)[0:] datetimes = p.xpath(xpath_datetime)[0:] committes = p.xpath(xpath_committee)[0:] datetimes = [datetime for datetime in datetimes if datetime.strip() != ''] link_params = [link_param.replace('jsDetail(', '').replace(');return false;', '') for link_param in link_params] dates = [datetime[:date_length].strip() for datetime in datetimes] times = [datetime[date_length:].strip() for datetime in datetimes] types = [title[title.find('[')+1:title.find(']')] for title in raw_titles] titles = [title[title.find(']')+2:] for title in raw_titles] sessions = [session_re.findall(title)[0] for title in titles] sittings = [sitting_re.findall(title)[0] for title in titles] links = [eval('get_link_url(%s)' % link_param) for link_param in link_params] return zip(dates, times, types, titles, sessions, sittings, committes, links)
def parse_meeting_schedule(filename): date_length = len("0000-00-00") + 1 session_re = re.compile(u"제(?P<session>[0-9]+)회") sitting_re = re.compile(u"제(?P<sitting>[0-9]+)차") with open(filename, "r") as f: p = get_webpage(f) raw_titles = p.xpath(xpath_title)[0:] link_params = p.xpath(xpath_link_params)[0:] datetimes = p.xpath(xpath_datetime)[0:] committes = p.xpath(xpath_committee)[0:] datetimes = [datetime for datetime in datetimes if datetime.strip() != ""] link_params = [link_param.replace("jsDetail(", "").replace(");return false;", "") for link_param in link_params] dates = [datetime[:date_length].strip() for datetime in datetimes] times = [datetime[date_length:].strip() for datetime in datetimes] types = [title[title.find("[") + 1 : title.find("]")] for title in raw_titles] titles = [title[title.find("]") + 2 :] for title in raw_titles] sessions = [session_re.findall(title)[0] for title in titles] sittings = [sitting_re.findall(title)[0] for title in titles] links = [eval("get_link_url(%s)" % link_param) for link_param in link_params] return zip(dates, times, types, titles, sessions, sittings, committes, links)
def fdate(finfo): fname, desc, op_time = finfo oper, datetime = op_time # datetime example: 'Wed May 12 15:07:34 2010 ' dt = datetime.strip().split() year, month, day, hour = \ (dt[4], time.strptime(dt[1],'%b').tm_mon, int(dt[2]), dt[3]) \ if len(dt) > 4 else '','','','' return year, month, day, hour
def deal_publish_time(publish_time_couple): publish_time_list, url = publish_time_couple[ 0], publish_time_couple[1] if publish_time_list: publish_time_str = publish_time_list[0] else: return '2018-02-01 00:00:00' try: publish_time_str = str(publish_time_str) datetime = url.split('tibet.net/')[1] datetime_splited = datetime.strip('/').split('/') year = datetime_splited[0] mounth = datetime_splited[1] return str(year) + '-' + str(mounth) + '-' + str( publish_time_str).strip() + ' 00:00:00' except: return '2018-02-01 00:00:00'
async def _tz(self, ctx, datetime): """Converts date and time to multiple timezones""" usertimezones = await self._config.guild(ctx.guild).usertimezones() if len(self._timezoneids) == 0: showutc = await self._config.guild(ctx.guild).showutc() self._timezoneids = await self._config.guild(ctx.guild).timezones() if showutc: self._timezoneids.insert(0, "UTC") txt = datetime.strip() msg: str = None if txt == "help": msg = self._help() elif txt == "tz": msg = self._avtzs() elif txt[0:2] == "me": if not usertimezones: return "Personal timezones are not enabled" tmp = txt.split(" ") if len(tmp) == 1: msg = await usercfg.get_user_tz(ctx, self._config, self._tzs) elif len(tmp) == 2: msg = await usercfg.set_user_tz(ctx, self._config, self._tzs, tmp[1]) else: msg = "Error in syntax. Try `[p]tz me <timezone>` to view or set your timezone" else: try: tzid = await usercfg.get_user_tzid(ctx, self._config) msg = self._doConversion(txt, tzid) except error.TimezoneNotFoundError: msg = "The timezone identifier was not found. Please have a look at `[p]tz tz` for valid identifiers." except error.ParsingError: msg = "Unable to extract date and/or time. Please have a look at `[p]tz help` for help on formatting." except error.DateError as e: msg = str(e) except error.TimeError as e: msg = str(e) except error.Error: msg = "Uh oh, something went wrong." return msg
def blog_datetime(content): c_split = content.split() datetime = '' for i in range(len(c_split) - 3, len(c_split)): datetime += c_split[i] + ' ' return datetime.strip()
def fyear(finfo): fname, desc, op_time = finfo oper, datetime = op_time return datetime.strip().split()[-1]