예제 #1
0
def handle_download(download_page, params):
	"""Original function"""

	global output_dir
	print(params)	

	filename = archive_format(params) + ".pdf"
	volumes = {
		'Α' : 'A',
		'Β' : 'B',
		'Γ' : 'C',
		'Δ' : 'D',
		'Α.ΕΙ.Δ.' : 'A.EI.D',
		'Α.Σ.Ε.Π.': 'A.S.E.P',
		'Δ.Δ.Σ.': 'D.D.S',
		'Α.Π.Σ.':  'A.P.S',
		'Υ.Ο.Δ.Δ.': 'Y.O.D.D',
		'Α.Α.Π.' : 'A.A.P',
		'Ν.Π.Δ.Δ.': 'N.P.D.D',
		'ΠΑΡΑΡΤΗΜΑ': 'APPENDIX', 
		'Δ.Ε.Β.Ι.': 'D.E.B.I',
		'ΑΕ-ΕΠΕ': 'AE-EPE',
		'Ο.Π.Κ.': 'O.P.K',		

	}
	vol = volumes[params['issue_type']]
	year = params['issue_date'].year

	dirs = '{}/{}/{}'.format(vol, year, filename[6:9])
	os.system('mkdir -p {}/{}'.format(output_dir, dirs))
	outfile = '{}/{}/{}'.format(output_dir, dirs, filename)

	if os.path.isfile(outfile):
	   logging.info('{} already a file'.format(filename))
	   return
	

	try:
		# First we get the redirect link from the download page
		html = Helper.get_url_contents(download_page)
		beautiful_soup = BeautifulSoup(html, "html.parser")
		meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"})
		download_link = meta['content'].replace("0;url=", "")

		# We do the same process twice because it involves 2 redirects.
		beautiful_soup = BeautifulSoup(
			Helper.get_url_contents(download_link), "html.parser")
		meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"})
		download_link = meta['content'].replace("0;url=", "")
	except BaseException as e:
		logging.error("Exception occurred while processing a link",exc_info=True)
		print(e)
		return None
	print(filename)
	logging.info('Downloaded {}'.format(filename))
	Helper.download(download_link, filename, output_dir + '/' + dirs)
	return outfile
예제 #2
0
def handle_download(download_page, params):
    """Original function"""

    global output_dir
    print(params)

    filename = archive_format(params) + ".pdf"
    volumes = {'Α': 'A', 'Β': 'B'}
    vol = volumes[params['issue_type']]
    year = params['issue_date'].year

    dirs = '{}/{}/{}'.format(vol, year, filename[6:9])
    os.system('mkdir -p {}/{}'.format(output_dir, dirs))
    outfile = '{}/{}/{}'.format(output_dir, dirs, filename)

    if os.path.isfile(outfile):
        print('Already a file')
        return

    try:
        # First we get the redirect link from the download page
        html = Helper.get_url_contents(download_page)
        beautiful_soup = BeautifulSoup(html, "html.parser")
        meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"})
        download_link = meta['content'].replace("0;url=", "")

        # We do the same process twice because it involves 2 redirects.
        beautiful_soup = BeautifulSoup(Helper.get_url_contents(download_link),
                                       "html.parser")
        meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"})
        download_link = meta['content'].replace("0;url=", "")
    except BaseException as e:
        print(e)
        return None
    print(filename)
    Helper.download(download_link, filename, output_dir + '/' + dirs)
    return filename