def pubdates(self): # author : saranya # article -> front -> article-meta -> pub-date # [date1, date2] # date: # {'pub-type':'val','pub-date':'yyyy-mm-dd'} ph = ParserHelper() result = [] startTag = self.dom.getElementsByTagName('front') for front in startTag: for tagsInFront in front.childNodes: if tagsInFront.nodeName == 'article-meta': for articleMetaTags in tagsInFront.childNodes: if articleMetaTags.nodeName == 'pub-date': date = dict() datestr = '' month = '01' day = '01' year = '1900' date['pub-type'] = articleMetaTags.getAttribute( 'pub-type') for tagsInPubDate in articleMetaTags.childNodes: if tagsInPubDate.nodeName == 'month': try: month = tagsInPubDate.firstChild.data except AttributeError: month = '01' month = ph.date_format_helper(month) if tagsInPubDate.nodeName == 'day': try: day = tagsInPubDate.firstChild.data except AttributeError: day = '01' day = ph.date_format_helper(day) if tagsInPubDate.nodeName == 'year': try: year = tagsInPubDate.firstChild.data except AttributeError: year = '1900' datestr += year + '-' datestr += month + '-' datestr += day date['pub-date'] = datestr result.append(date) return result
def pubdates(self): # author : saranya # article -> front -> article-meta -> pub-date # [date1, date2] # date: # {'pub-type':'val','pub-date':'yyyy-mm-dd'} ph = ParserHelper() result = [] startTag = self.dom.getElementsByTagName('front') for front in startTag: for tagsInFront in front.childNodes: if tagsInFront.nodeName == 'article-meta': for articleMetaTags in tagsInFront.childNodes: if articleMetaTags.nodeName == 'pub-date': date = dict() datestr = '' month = '01' day = '01' year = '1900' date['pub-type'] = articleMetaTags.getAttribute('pub-type') for tagsInPubDate in articleMetaTags.childNodes: if tagsInPubDate.nodeName == 'month': try: month = tagsInPubDate.firstChild.data except AttributeError: month = '01' month = ph.date_format_helper(month) if tagsInPubDate.nodeName == 'day': try: day = tagsInPubDate.firstChild.data except AttributeError: day = '01' day = ph.date_format_helper(day) if tagsInPubDate.nodeName == 'year': try: year = tagsInPubDate.firstChild.data except AttributeError: year = '1900' datestr += year + '-' datestr += month + '-' datestr += day date['pub-date'] = datestr result.append(date) return result