def lambda_handler(event, context): arxiu = 'MapResult' bucket_name = "mapreducerenricalvaro" #n = int(''.join(str(digit) for digit in filter(lambda x: x>='0' and x<='9', list(event['Records'][0]['s3']['object']['key']))[0] )) #num mapper n = filter(lambda x: x >= '0' and x <= '9', list(event['Records'][0]['s3']['object']['key']))[0] #num mappers #descarregar arxiu dictionaries = [] for i in range(0, int(n)): download_file(arxiu + str(i) + ".shlf", bucket_name) dictionaries.append( shelve.open("/tmp/" + arxiu + str(i) + '.shlf')[arxiu + str(i)]) time1 = time.time() #funcio de reduce reduct = {} reduct = dict(reduce(add, (Counter(dict(x)) for x in dictionaries))) print "Temps Reducer" + str(time.time() - time1) #escriure a disc serialitzat arxiu_pujar = 'Reduce' shelf = shelve.open("/tmp/" + arxiu_pujar + ".shlf") shelf[arxiu_pujar] = reduct shelf.close() #carregar arxiu upload_file(arxiu_pujar + ".shlf", bucket_name) return 'Reducer done!'
def lambda_handler(event, context): n = filter(lambda x: x >= '0' and x <= '9', list( event['Records'][0]['s3']['object']['key']))[0] #num mapper arxiu = "Map" + str(n) bucket_name = "mapreducerenricalvaro" #descarregar arxiu download_file(arxiu + ".txt", bucket_name) text = open("/tmp/" + arxiu + ".txt", 'r').read() a = re.sub('[^ a-zA-Z0-9]', ' ', text).split() result = {} time1 = time.time() for paraula in a: if paraula not in result: result[paraula] = 1 else: result[paraula] += 1 print "TEMPS MAP" + str(n) + ": " + str(time.time() - time1) #escriure a disc serialitzat arxiu_pujar = 'MapResult' + str(n) shelf = shelve.open("/tmp/" + arxiu_pujar + ".shlf") shelf[arxiu_pujar] = result shelf.close() #carregar arxiu upload_file(arxiu_pujar + ".shlf", bucket_name) return 'Mapper ' + str(n) + ' done!'
def charge_from_ODS(workspace,coverage,base_format='GTFS'): # we have to find the data ODS_platform='https://navitia.opendatasoft.com' data=charge_json(ODS_platform+'/api/v2/catalog/datasets/'+coverage+'/records?q='+coverage+'-'+base_format) print(data) id_to_download=data['records'][0]['record']['fields']['download']['id'] print (id_to_download) # now we download the data url=ODS_platform+'/explore/dataset/'+coverage+'/files/'+id_to_download+'/download/' download_file(url,workspace+coverage+'.zip')
def download_VG_many (cart_id, map_name, storing_name, number): # Spesify where to store the files directory = "../Downloaded_cartoons/" + map_name functions.make_dir(directory) # Function that check if the directory exists. If not - create it for i in range(number): # Make a new url date_ = date.today() - timedelta(int(i+1)) date_ = str(date_) td = date_.split('-') url_string_array = ["http://www.heltnormalt.no/img/",cart_id,td[0],"/",td[1],"/",td[2],".jpg"] picture_url = ''.join(url_string_array) # Make a new name for the file with timestamp name_string_array = [directory,storing_name,"_",date_,"_VG",".jpg"] tag = ''.join(name_string_array) # Downlad the picture if (functions.download_file(picture_url, tag) == 0): break if ((i+1)%10 == 0): print ("finished with picture # " , i+1) print ("*** Finished " + storing_name + ". Downloaded " + str(i) + " pictures") return
def main(argv): keys = Keys() _access_key = keys._access_key _secret_access_key = keys._secret_access_key _region = 'eu-west-1' try: opts, args = getopt.getopt(argv, "hi:o:", [ "option= create_bucket|upload_file|delete_file|download_file|get_files", ]) except getopt.GetoptError: print('manace_ec2.py -o <option> ') sys.exit(2) for opt, arg in opts: if opt == '-h': print('test.py -o <option> ') sys.exit() elif opt in ("-o", "--option"): option = arg s3_client = boto3.client('s3', aws_access_key_id=_access_key, aws_secret_access_key=_secret_access_key, region_name=_region) if option == 'create_bucket': f.create_bucket(s3_client) if option == 'upload_file': f.upload_file(s3_client=s3_client, file='boto3.jpg', bucket='opensouthtest') if option == 'delete_file': f.delete_file(s3_client=s3_client, file='boto3.jpg', bucket='opensouthtest') if option == 'download_file': f.download_file(s3_client=s3_client, file='boto3.jpg', bucket='opensouthtest') if option == 'get_files': f.get_files(s3_client=s3_client, bucket='opensouthtest')
def download_VG (cart_id, storing_name): # Make a new url today = str(datetime.date.today()) td = today.split('-') url_string_array = ["http://www.heltnormalt.no/img/",cart_id,td[0],"/",td[1],"/",td[2],".jpg"] picture_url = ''.join(url_string_array) # Make a new name for the file with timestamp name_string_array = [directory,storing_name,today,"_VG",".jpg"] tag = ''.join(name_string_array) # Downlad the picture functions.download_file(picture_url, tag) print ("Finished " + tag) return
def download_DB (url, cart_id, storing_name): # Download the html-code from the given url and convert to "BS" opener = urllib.request.FancyURLopener({}) f = opener.open(url) html_code = f.read() soup = BS(html_code) # Find the url to the right picture picture = soup.find(id=cart_id) picture_url = picture['src'] # Make a new name for the file with timestamp today = str(datetime.date.today()) string_array = [directory,storing_name,today,"_DB",".jpg"] tag = ''.join(string_array) # Downlad the picture functions.download_file(picture_url, tag) print ("Finished " + tag) return
import functions import os #create style ,javascript and images directories if os.path.isdir("style") == False: os.mkdir("style") if os.path.isdir("javascript") == False: os.mkdir("javascript") if os.path.isdir("images") == False: os.mkdir("images") if os.path.isdir("other") == False: os.mkdir("other") url = raw_input("Enter the url of the file you wish to download and process\n") filename = functions.download_file(url) functions.get_fileslinks(functions.download_file, filename)
for date_item in data.DATE_RELEVANT: if date_item in category_tuple[0]: date_list = f.check_date(title_text, regex_date) break # create a proper filename filename = f.construct_filename(date_list, facility_en_name, category_tuple[0], file_index) # set the path based on the filename filepath = "c:\\Users\\user\\Downloads\\%s" % filename # # for mac # # filepath = "/Users/apple/Downloads/%s" %filename # download link.click() time.sleep(1) f.download_file(driver, link, filename, filepath) file_index += 1 driver.back() time.sleep(1) print(filename) # # end of the 1st level loop================================================================================================================== # # end of the 2nd level loop========================================================================================================================================== # # back to the facility list page f.back_to_facility(driver) # give the user some time to manage the downloaded files before the download of the next category begins and the program terminates if "n" is pressed waiting = input("Start the next download? (y/n)") if waiting == "n": break
def index(request): context = {} context['days_of_cov_19'] = days_of_cov_19() if request.method == 'POST' and request.POST.get('email') : email = request.POST.get('email') if subscription_save("None",email,"Daily") is True: return HttpResponse("Your Email address has been saved") if request.method == 'POST' and request.POST.get('source_data_from_date') and request.POST.get('source_data_to_date') : From_date = request.POST.get('source_data_from_date') To_date = request.POST.get('source_data_to_date') try: datetime.datetime.strptime(From_date, '%Y-%m-%d') datetime.datetime.strptime(To_date, '%Y-%m-%d') except ValueError: return HttpResponse("Incorrect data format, should be YYYY-MM-DD") if From_date>= To_date:return HttpResponse("Incorrect data Range, From > To") file_list = download_file(str(From_date),str(To_date)) #file = open('../' + file_list[0][1], 'rb') file = open(file_list, 'rb') response = FileResponse(file) response['Content-Type'] = 'application/octet-stream' response['Content-Disposition'] = 'attachment;filename=' + 'file.zip' return response if request.method == 'POST' and request.POST.get('increasement_from_date') and request.POST.get('increasement_to_date'): From_date = str(request.POST.get('increasement_from_date')).replace("-","_") To_date = str(request.POST.get('increasement_to_date')).replace("-","_") try: datetime.datetime.strptime(From_date, '%Y_%m_%d') datetime.datetime.strptime(To_date, '%Y_%m_%d') except ValueError: return HttpResponse("Incorrect data format, it should be YYYY-MM-DD") try: ''' file_name : file is the output file name (output_path + file_name) enable : True = file will created , false = file will not be created ''' output_file_config = { 'covid_daily_increasement': {'file_name': "covid_daily_increasement_customized.html", 'output_path': system_variables_path +Django_variables_path, 'enable': True} } get_daily_increment_from_variables(From_date,To_date,output_file_config) return render(request , 'covid_daily_increasement_customized.html') except IndexError: return HttpResponse("No Data") if request.method == 'POST' and request.POST.get('themeRiver_from_date') and request.POST.get('themeRiver_to_date'): From_date = str(request.POST.get('themeRiver_from_date')).replace("-","_") To_date = str(request.POST.get('themeRiver_to_date')).replace("-","_") try: datetime.datetime.strptime(From_date, '%Y_%m_%d') datetime.datetime.strptime(To_date, '%Y_%m_%d') except ValueError: return HttpResponse("Incorrect data format, it should be YYYY-MM-DD") try: ''' file_name : file is the output file name (output_path + file_name) enable : True = file will created , false = file will not be created ''' output_file_config = { 'covid_daily_update_themeRiver': {'file_name': "covid_daily_update_themeRiver_customized.html", 'output_path': system_variables_path + Django_variables_path, 'enable': True} } get_daily_increment_from_variables(From_date,To_date,output_file_config) return render(request , 'covid_daily_update_themeRiver_customized.html') except IndexError: return HttpResponse("No Data") return render(request, 'index.html',context)
# set the path based on the filename filepath = "c:\\Users\\user\\Downloads\\%s" % filename # for mac # filepath = "/Users/apple/Downloads/%s" %filename # if the file exists, log the message, back to the previous page and continue the loop if f.check_existing(filepath): # logging.debug("%s already exists.\n" %filename) file_index += 1 print("%s: already exists.\n" % filename) continue # otherwise, download the file else: # download f.click_link(link) f.download_file(driver, link, filename, filepath, file_index) # prepare for the next download f.prepare_next(driver, file_index) # end of the 1st level loop================================================================================================================== # end of the 2nd level loop========================================================================================================================================== # back to the facility list page f.back_to_facility(driver) # give the user some time to manage the downloaded files before the download of the next category begins and the program terminates if "n" is pressed waiting = input("Start the next download? (y/n)") if waiting == "n": break # end of the 3rd level loop=========================================================================================================================================================