예제 #1
0
 def pubdates(self):
     # author : saranya
     # article -> front -> article-meta -> pub-date
     # [date1, date2]
     # date:
     # {'pub-type':'val','pub-date':'yyyy-mm-dd'}
     ph = ParserHelper()
     result = []
     startTag = self.dom.getElementsByTagName('front')
     for front in startTag:
         for tagsInFront in front.childNodes:
             if tagsInFront.nodeName == 'article-meta':
                 for articleMetaTags in tagsInFront.childNodes:
                     if articleMetaTags.nodeName == 'pub-date':
                         date = dict()
                         datestr = ''
                         month = '01'
                         day = '01'
                         year = '1900'
                         date['pub-type'] = articleMetaTags.getAttribute(
                             'pub-type')
                         for tagsInPubDate in articleMetaTags.childNodes:
                             if tagsInPubDate.nodeName == 'month':
                                 try:
                                     month = tagsInPubDate.firstChild.data
                                 except AttributeError:
                                     month = '01'
                                 month = ph.date_format_helper(month)
                             if tagsInPubDate.nodeName == 'day':
                                 try:
                                     day = tagsInPubDate.firstChild.data
                                 except AttributeError:
                                     day = '01'
                                 day = ph.date_format_helper(day)
                             if tagsInPubDate.nodeName == 'year':
                                 try:
                                     year = tagsInPubDate.firstChild.data
                                 except AttributeError:
                                     year = '1900'
                         datestr += year + '-'
                         datestr += month + '-'
                         datestr += day
                         date['pub-date'] = datestr
                         result.append(date)
     return result
예제 #2
0
	def pubdates(self):
		# author : saranya
		# article -> front -> article-meta -> pub-date
		# [date1, date2]
		# date:
		# {'pub-type':'val','pub-date':'yyyy-mm-dd'}
		ph = ParserHelper()
		result = []		
		startTag = self.dom.getElementsByTagName('front')
		for front in startTag:
			for tagsInFront in front.childNodes:
				if tagsInFront.nodeName == 'article-meta':
					for articleMetaTags in tagsInFront.childNodes:
						if articleMetaTags.nodeName == 'pub-date':
							date = dict()
							datestr = '' 
							month = '01'
							day = '01'
							year = '1900'
							date['pub-type'] = articleMetaTags.getAttribute('pub-type')
							for tagsInPubDate in articleMetaTags.childNodes:
								if tagsInPubDate.nodeName == 'month':
									try:
										month = tagsInPubDate.firstChild.data
									except AttributeError:
										month = '01'
									month = ph.date_format_helper(month)
								if tagsInPubDate.nodeName == 'day':
									try:
										day = tagsInPubDate.firstChild.data
									except AttributeError:
										day = '01'
									day = ph.date_format_helper(day)
								if tagsInPubDate.nodeName == 'year':
									try:
										year = tagsInPubDate.firstChild.data
									except AttributeError:
										year = '1900'
							datestr += year + '-'
							datestr += month + '-'
							datestr += day
							date['pub-date'] = datestr
							result.append(date)
		return result