def get_date(self): string = lxml_utils.get_text_following_by_tag(self.html, 'em', "Дата") if string is None: return None for f in settings.date_formats: try: date = time.strptime(string, f) return datetime.fromtimestamp(time.mktime(date)) except ValueError: continue return None
def read(self): with open(self.description_path, 'r', encoding='utf-8') as dfile: data = dfile.read() md = markdown(data) self.html = lxml.html.fromstring(md) # ALBUM try: album_title = self.html.xpath('.//h1/a/text()')[0] album_uri = self.html.xpath('.//h1/a/@href')[0] except: raise Exception('Cannot read album title/uri') # COURSE try: course_title = self.html.xpath('.//p/a/text()')[0] course_uri = self.html.xpath('.//p/a/@href')[0] except: raise Exception('Cannot read course title/uri') # TEACHER teacher = lxml_utils.get_text_following_by_tag(self.html, 'em', "Преподаватель") teacher = re.sub(r'\s', '', teacher) if teacher not in settings.teachers_names: raise Exception('Teacher %s is not found' % teacher) teacher_name = settings.teachers_names[teacher] # DATE date = self.get_date() if date is None: raise Exception('Date field is empty') # PRODUCT content = self.get_drawings() a = Album(type='1', title=album_title, uri=album_uri, course=course_title, course_uri=course_uri, date=date, teacher=teacher_name, comment=None) for p in content: author_name, author_age, filename, product_title = p product = self.create_product(a, author_name, author_age, filename, product_title) product.image_original_path = os.path.join(self.dir_path, filename) a.add_product(product) return a