def lambda_handler(event, context):
    arxiu = 'MapResult'
    bucket_name = "mapreducerenricalvaro"

    #n = int(''.join(str(digit) for digit in filter(lambda x: x>='0' and x<='9', list(event['Records'][0]['s3']['object']['key']))[0] )) #num mapper
    n = filter(lambda x: x >= '0' and x <= '9',
               list(event['Records'][0]['s3']['object']['key']))[0]
    #num mappers

    #descarregar arxiu
    dictionaries = []
    for i in range(0, int(n)):
        download_file(arxiu + str(i) + ".shlf", bucket_name)
        dictionaries.append(
            shelve.open("/tmp/" + arxiu + str(i) + '.shlf')[arxiu + str(i)])

    time1 = time.time()
    #funcio de reduce
    reduct = {}
    reduct = dict(reduce(add, (Counter(dict(x)) for x in dictionaries)))
    print "Temps Reducer" + str(time.time() - time1)

    #escriure a disc serialitzat
    arxiu_pujar = 'Reduce'
    shelf = shelve.open("/tmp/" + arxiu_pujar + ".shlf")
    shelf[arxiu_pujar] = reduct
    shelf.close()

    #carregar arxiu
    upload_file(arxiu_pujar + ".shlf", bucket_name)

    return 'Reducer done!'
def lambda_handler(event, context):
    n = filter(lambda x: x >= '0' and x <= '9',
               list(
                   event['Records'][0]['s3']['object']['key']))[0]  #num mapper

    arxiu = "Map" + str(n)
    bucket_name = "mapreducerenricalvaro"

    #descarregar arxiu
    download_file(arxiu + ".txt", bucket_name)
    text = open("/tmp/" + arxiu + ".txt", 'r').read()
    a = re.sub('[^ a-zA-Z0-9]', ' ', text).split()
    result = {}

    time1 = time.time()

    for paraula in a:
        if paraula not in result:
            result[paraula] = 1
        else:
            result[paraula] += 1

    print "TEMPS MAP" + str(n) + ": " + str(time.time() - time1)

    #escriure a disc serialitzat
    arxiu_pujar = 'MapResult' + str(n)
    shelf = shelve.open("/tmp/" + arxiu_pujar + ".shlf")
    shelf[arxiu_pujar] = result
    shelf.close()

    #carregar arxiu
    upload_file(arxiu_pujar + ".shlf", bucket_name)
    return 'Mapper ' + str(n) + ' done!'
Example #3
0
def charge_from_ODS(workspace,coverage,base_format='GTFS'):
    # we have to find the data
    ODS_platform='https://navitia.opendatasoft.com'
    data=charge_json(ODS_platform+'/api/v2/catalog/datasets/'+coverage+'/records?q='+coverage+'-'+base_format)
    print(data)
    id_to_download=data['records'][0]['record']['fields']['download']['id']
    print (id_to_download)
    # now we download the data
    url=ODS_platform+'/explore/dataset/'+coverage+'/files/'+id_to_download+'/download/'
    download_file(url,workspace+coverage+'.zip')
def download_VG_many (cart_id, map_name, storing_name, number):

    # Spesify where to store the files
    directory = "../Downloaded_cartoons/" + map_name
    
    functions.make_dir(directory) #  Function that check if the directory exists. If not - create it

    for i in range(number):
        # Make a new url
        date_ = date.today() - timedelta(int(i+1))
        date_ = str(date_)
        td = date_.split('-')
        url_string_array = ["http://www.heltnormalt.no/img/",cart_id,td[0],"/",td[1],"/",td[2],".jpg"]
        picture_url = ''.join(url_string_array)
 
        # Make a new name for the file with timestamp
        name_string_array = [directory,storing_name,"_",date_,"_VG",".jpg"]
        tag = ''.join(name_string_array)
        

        # Downlad the picture
        if (functions.download_file(picture_url, tag) == 0):
            break

        if ((i+1)%10 == 0):
            print ("finished with picture # " , i+1)

    print ("*** Finished " + storing_name + ". Downloaded " + str(i) + " pictures")
    return
Example #5
0
def main(argv):
    keys = Keys()
    _access_key = keys._access_key
    _secret_access_key = keys._secret_access_key
    _region = 'eu-west-1'

    try:
        opts, args = getopt.getopt(argv, "hi:o:", [
            "option= create_bucket|upload_file|delete_file|download_file|get_files",
        ])
    except getopt.GetoptError:
        print('manace_ec2.py -o <option> ')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('test.py -o <option> ')
            sys.exit()

        elif opt in ("-o", "--option"):
            option = arg

        s3_client = boto3.client('s3',
                                 aws_access_key_id=_access_key,
                                 aws_secret_access_key=_secret_access_key,
                                 region_name=_region)

    if option == 'create_bucket':
        f.create_bucket(s3_client)

    if option == 'upload_file':
        f.upload_file(s3_client=s3_client,
                      file='boto3.jpg',
                      bucket='opensouthtest')

    if option == 'delete_file':
        f.delete_file(s3_client=s3_client,
                      file='boto3.jpg',
                      bucket='opensouthtest')

    if option == 'download_file':
        f.download_file(s3_client=s3_client,
                        file='boto3.jpg',
                        bucket='opensouthtest')

    if option == 'get_files':
        f.get_files(s3_client=s3_client, bucket='opensouthtest')
def download_VG (cart_id, storing_name):

    # Make a new url
    today = str(datetime.date.today())
    td = today.split('-')
    url_string_array = ["http://www.heltnormalt.no/img/",cart_id,td[0],"/",td[1],"/",td[2],".jpg"]
    picture_url = ''.join(url_string_array)

    # Make a new name for the file with timestamp
    name_string_array = [directory,storing_name,today,"_VG",".jpg"]
    tag = ''.join(name_string_array)

    # Downlad the picture
    functions.download_file(picture_url, tag)

    print ("Finished " + tag)
   
    return
def download_DB (url, cart_id, storing_name):

    # Download the html-code from the given url and convert to "BS"
    opener = urllib.request.FancyURLopener({})
    f = opener.open(url)
    html_code = f.read()
    soup = BS(html_code)

    # Find the url to the right picture
    picture = soup.find(id=cart_id)
    picture_url = picture['src']

    # Make a new name for the file with timestamp
    today = str(datetime.date.today())
    string_array = [directory,storing_name,today,"_DB",".jpg"]
    tag = ''.join(string_array)

    # Downlad the picture
    functions.download_file(picture_url, tag)

    print ("Finished " + tag)
   
    return
Example #8
0
import functions
import os
#create style ,javascript and images directories

if os.path.isdir("style") == False:
    os.mkdir("style")

if os.path.isdir("javascript") == False:
    os.mkdir("javascript")

if os.path.isdir("images") == False:
    os.mkdir("images")

if os.path.isdir("other") == False:
    os.mkdir("other")

url = raw_input("Enter the url of the file you wish to download and process\n")

filename = functions.download_file(url)

functions.get_fileslinks(functions.download_file, filename)
Example #9
0
                for date_item in data.DATE_RELEVANT:
                    if date_item in category_tuple[0]:
                        date_list = f.check_date(title_text, regex_date)
                        break
                # create a proper filename
                filename = f.construct_filename(date_list, facility_en_name,
                                                category_tuple[0], file_index)
                # set the path based on the filename
                filepath = "c:\\Users\\user\\Downloads\\%s" % filename

                # 			# for mac
                # 			# filepath = "/Users/apple/Downloads/%s" %filename
                # download
                link.click()
                time.sleep(1)
                f.download_file(driver, link, filename, filepath)
                file_index += 1
                driver.back()
                time.sleep(1)
                print(filename)

    # 		# end of the 1st level loop==================================================================================================================

    # # end of the 2nd level loop==========================================================================================================================================

    # # back to the facility list page
    f.back_to_facility(driver)
    # give the user some time to manage the downloaded files before the download of the next category begins and the program terminates if "n" is pressed
    waiting = input("Start the next download? (y/n)")
    if waiting == "n":
        break
Example #10
0
def index(request):

    context = {}
    context['days_of_cov_19'] = days_of_cov_19()

    if  request.method == 'POST' and  request.POST.get('email')  :
        email = request.POST.get('email')

        if  subscription_save("None",email,"Daily") is True:
            return HttpResponse("Your Email address has been saved")

    if  request.method == 'POST' and  request.POST.get('source_data_from_date') and  request.POST.get('source_data_to_date') :

        From_date = request.POST.get('source_data_from_date')
        To_date = request.POST.get('source_data_to_date')

        try:
            datetime.datetime.strptime(From_date, '%Y-%m-%d')
            datetime.datetime.strptime(To_date, '%Y-%m-%d')
        except ValueError:
            return HttpResponse("Incorrect data format, should be YYYY-MM-DD")

        if From_date>= To_date:return HttpResponse("Incorrect data Range, From > To")

        file_list = download_file(str(From_date),str(To_date))

        #file = open('../' + file_list[0][1], 'rb')

        file = open(file_list, 'rb')
        response = FileResponse(file)
        response['Content-Type'] = 'application/octet-stream'
        response['Content-Disposition'] = 'attachment;filename=' + 'file.zip'

        return response

    if request.method == 'POST' and request.POST.get('increasement_from_date') and request.POST.get('increasement_to_date'):

        From_date = str(request.POST.get('increasement_from_date')).replace("-","_")
        To_date = str(request.POST.get('increasement_to_date')).replace("-","_")

        try:
            datetime.datetime.strptime(From_date, '%Y_%m_%d')
            datetime.datetime.strptime(To_date, '%Y_%m_%d')
        except ValueError:
            return HttpResponse("Incorrect data format, it should be YYYY-MM-DD")
        try:

            ''' 
             file_name : file is the output file name (output_path + file_name)
             enable : True = file will created , false = file will not be created  
             '''
            output_file_config = {
                'covid_daily_increasement': {'file_name': "covid_daily_increasement_customized.html",
                                             'output_path': system_variables_path +Django_variables_path,
                                             'enable': True}
            }

            get_daily_increment_from_variables(From_date,To_date,output_file_config)
            return render(request , 'covid_daily_increasement_customized.html')

        except IndexError:
            return HttpResponse("No Data")

    if request.method == 'POST' and request.POST.get('themeRiver_from_date') and request.POST.get('themeRiver_to_date'):

        From_date = str(request.POST.get('themeRiver_from_date')).replace("-","_")
        To_date = str(request.POST.get('themeRiver_to_date')).replace("-","_")

        try:
            datetime.datetime.strptime(From_date, '%Y_%m_%d')
            datetime.datetime.strptime(To_date, '%Y_%m_%d')
        except ValueError:
            return HttpResponse("Incorrect data format, it should be YYYY-MM-DD")
        try:

            ''' 
             file_name : file is the output file name (output_path + file_name)
             enable : True = file will created , false = file will not be created  
             '''
            output_file_config = {
                'covid_daily_update_themeRiver': {'file_name': "covid_daily_update_themeRiver_customized.html",
                                                      'output_path': system_variables_path + Django_variables_path, 'enable': True}
                }


            get_daily_increment_from_variables(From_date,To_date,output_file_config)
            return render(request , 'covid_daily_update_themeRiver_customized.html')

        except IndexError:
            return HttpResponse("No Data")




    return render(request, 'index.html',context)
                # set the path based on the filename
                filepath = "c:\\Users\\user\\Downloads\\%s" % filename
                # for mac
                # filepath = "/Users/apple/Downloads/%s" %filename

                # if the file exists, log the message, back to the previous page and continue the loop
                if f.check_existing(filepath):
                    # logging.debug("%s already exists.\n" %filename)
                    file_index += 1
                    print("%s: already exists.\n" % filename)
                    continue
                # otherwise, download the file
                else:
                    # download
                    f.click_link(link)
                    f.download_file(driver, link, filename, filepath,
                                    file_index)
                    # prepare for the next download
                    f.prepare_next(driver, file_index)

            # end of the 1st level loop==================================================================================================================

    # end of the 2nd level loop==========================================================================================================================================

    # back to the facility list page
    f.back_to_facility(driver)
    # give the user some time to manage the downloaded files before the download of the next category begins and the program terminates if "n" is pressed
    waiting = input("Start the next download? (y/n)")
    if waiting == "n":
        break
# end of the 3rd level loop=========================================================================================================================================================