/
main.py
81 lines (52 loc) · 2.64 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import sys
import pandas as pd
from upload_all_new import upload_all_new_azure
from download_csvs import download_csvs
from combine_csvs import combine_csvs
from my_zip import create_zip_from_file
from my_website import generate_website_and_upload_azure, upload_log
import logging
import sys
from create_diffs import create_difference
if os.path.exists('.env'):
for line in open('.env'):
var = line.strip().split('=',1)
if len(var) == 2:
os.environ[var[0]] = var[1]
logging.basicConfig(filename=os.path.join(os.getenv('LOGS_DIR'),"log.log"),
filemode='a',
level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',)
logger = logging.getLogger(__name__)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("azure").setLevel(logging.WARNING)
logger.info("----------------------------")
logger.info("starting script")
logger.info("current working directory is {}".format(os.getcwd()))
logger.info("python executable is {}".format(sys.executable))
azure_container = os.getenv('CONTAINER_NAME')
logger.info("uploading to azure container {}".format(azure_container))
# First we want to decide whether to run any code at all
# If there is a file uploaded on the website with a date in the past 7 days, then do nothing
# Else run all our code
logger.info("started - downloading csvs to {}".format(os.getenv('INDIVIDUAL_LA_CSV_DIR')))
download_csvs(os.getenv('INDIVIDUAL_LA_CSV_DIR'))
combined_csv_path = combine_csvs(os.getenv('INDIVIDUAL_LA_CSV_DIR'),os.getenv('COMBINED_LA_CSV_DIR'))
difference_csv_path = create_difference(combined_csv_path)
create_zip_from_file(combined_csv_path,os.getenv('FOR_UPLOAD_DIR'))
if difference_csv_path:
logger.debug("creating difference")
logger.debug(combined_csv_path)
logger.debug(difference_csv_path)
create_zip_from_file(difference_csv_path,os.getenv('FOR_UPLOAD_DIR'))
#Now we want a script that uploads any files in 'for_upload' to s3 which aren't already there
#Note that because the files are being transfered from EB to S3 this should be super fast - like 100MB/s
logger.info("uploading files to azure from {}".format(os.getenv('FOR_UPLOAD_DIR')))
upload_all_new_azure(os.getenv('FOR_UPLOAD_DIR'), azure_container, os.getenv('ACC_NAME'),os.getenv('ACCESS_KEY'))
# Finally we want to generate a little webpage to upload that contains all the links
logger.info("generating website {}".format(os.getenv('FOR_UPLOAD_DIR')))
generate_website_and_upload_azure(azure_container, "web")
logger.info("successfully completed whole script")
#Upload log
upload_log()