Esempio n. 1
0
def maybe_download_and_extract(runs_dir):
    logdir = os.path.join(runs_dir, default_run)

    if not os.path.exists(logdir):
        # weights are downloaded. Nothing to do
        print('mkdir')
        print(logdir)
        os.makedirs(logdir)

        import zipfile
        download_name = tv_utils.download(weights_url, runs_dir)
        logging.info("Extracting MultiNet_pretrained.zip")
        zipfile.ZipFile(download_name, 'r').extractall(runs_dir)

    #from download_date.py
    import download_data as d
    data_dir, run_dir = d.get_pathes()
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    vgg_weights = os.path.join(data_dir, 'vgg16.npy')

    # Download VGG DATA
    if not os.path.exists(vgg_weights):
        download_command = "wget {} -P {}".format(d.vgg_url, data_dir)
        logging.info("Downloading VGG weights.")
        d.download(d.vgg_url, data_dir)
    else:
        logging.warning("File: {} exists.".format(vgg_weights))
        logging.warning("Please delete to redownload VGG weights.")

    return
    def download_from_meta(self, filename, subpart_data=None, n_jobs=3, chunk_size=10):
        """
        Download files contained in a meta file (tsv)

        Args:
            filename: str, path of the meta file containing the name of audio files to donwnload
                (tsv with column "filename")
            subpart_data: int, the number of files to use, if a subpart of the dataframe wanted.
            chunk_size: int, (Default value = 10) number of files to download in a chunk
            n_jobs : int, (Default value = 3) number of parallel jobs
        """
        result_audio_directory = self.get_audio_dir_path_from_meta(filename)
        # read metadata file and get only one filename once
        df = DatasetDcase2019Task4.get_df_from_meta(filename, subpart_data)
        filenames = df.filename.drop_duplicates()
        download(filenames, result_audio_directory, n_jobs=n_jobs, chunk_size=chunk_size)
def plot_gdd(stationid):

    Citi_name, temp = DW.download(stationid, 2014)

    gdd1 = GD.gdd_cal_accum(stationid, 2015)
    gdd2 = GD.gdd_cal_accum(stationid, 2014)
    gdd3 = GD.gdd_cal_accum(stationid, 2013)
    gdd4 = GD.gdd_cal_accum(stationid, 2016)

    figure_name = "Bokeh_GDD-Accum_{}.html".format(stationid)
    bok_title="GDD PLOT Accum {}".format(Citi_name)

    plot=plotting.figure(title=bok_title)
    plot.xaxis.axis_label = 'Year/Time'
    plot.yaxis.axis_label = 'Accumlated GDD'

    x_data=np.linspace(1,13,12)

    plot.line(x_data,gdd1, legend="2015", line_color = "red")

    plot.line(x_data,gdd2, legend="2015", line_color = "blue")
    plot.circle(x_data,gdd2, legend="2014", line_color = "blue")

    plot.line(x_data,gdd3, legend="2015", line_color = "green")
    plot.triangle(x_data,gdd3, legend="2013", line_color = "green")

    plot.line(x_data,gdd4, legend="2016", line_color = "orange")

    plotting.output_file(os.path.dirname(os.path.realpath(__file__)) + "/../plots/"+ figure_name, title=bok_title)
    plotting.show(plot)
Esempio n. 4
0
def plot_min_max(stationid, year):

    #calling the download function and geting the table and citi name
    Citi_name, df_rename = DW.download(stationid, year)

    y = df_rename['Max Temp (°C)']
    y2 = df_rename['Min Temp (°C)']

    #creating Figure name
    figure_name = "Bokeh_Min-Max_{}.html".format(stationid)
    bok_title = "Bokeh_Min-Max plot {}".format(Citi_name)

    print(Citi_name)

    plot1 = plotting.figure(title=bok_title)
    plot1.xaxis.axis_label = 'Days'
    plot1.yaxis.axis_label = 'Temperature'

    x_data = np.linspace(0, 365, 365)

    plot1.line(x_data, y, legend="Max Temperature", line_color="red")
    plot1.line(x_data, y2, legend="Min Temperature", line_color="blue")
    plot1.circle(x_data, y2, line_color="blue")

    plotting.output_file(os.path.dirname(os.path.realpath(__file__)) +
                         "/../plots/" + figure_name,
                         title=bok_title)
    plotting.show(plot1)
def plot_min_max(stationid, year):

    #calling the download function and geting the table and citi name
    Citi_name, df_rename = DW.download(stationid, year)


    fig= plt.figure(num=1, figsize=(15,6))
    x = np.linspace(0,365,365)

    y = df_rename['Max Temp (°C)']
    y2 = df_rename['Min Temp (°C)']

    #creating Figure name
    figure_name = "Fig_{}_{}.png".format(stationid, year)

    plt.plot(x,y,label="Max Temperature")
    plt.plot(x,y2,label="Min Temperature")

    plt.legend(bbox_to_anchor=(1, 1), loc=1)
    plt.xticks(np.arange(0,365,10))
    plt.title(Citi_name, color = "red", size = 15)
    plt.xlabel('Days', color = "green")
    plt.ylabel('Temperature',color = "green")

    fig.savefig("../plots/"+figure_name) #for saving figure
    plt.show()
Esempio n. 6
0
def gdd_cal_accum(stationid, year):

    Citi_name, df_rename = DW.download(stationid, year)

    gdd = []
    for i in range(1, 365, 31):
        gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][0:i]))

    return gdd
Esempio n. 7
0
def main():
    names = list(config.DATA_NAMES.keys());
    download_data.clearDir(names);
    download_data.download(names);

    for name,keys in config.DATA_NAMES.items():
        if name in config.TUSHARE_DATA_NAME:
            continue;
        if name=='MktFunddAdjGet' or name=='FundETFConsGet':
            flt = False;
        else:
            flt = True;
            pass;
        print(name);
        dataio.updatedb(name,keys,filterSecID=flt);

    process_raw_data.main();
    utils_io.runExtract('extractors',None);
    pass;
Esempio n. 8
0
def main():
    logging.basicConfig(format=LOGGING_FORMAT)
    logger = logging.getLogger("PED")
    args = parse_args()

    logger.info(f"input config: {args}")

    if args.drive_folder_id:
        download_data.download(args.drive_folder_id)

    # Integrate data
    df = preprocess.integrate_data(FILENAMES_TO_INTEGRATE)

    # Fill missing values
    df = preprocess.fill_missing_values(df)

    # TODO column transformation + additional features

    # TODO grouping by video_id ?

    # TODO predict
    pass
Esempio n. 9
0
def plot_gdd(stationid, year):

    Citi_name, df_rename = DW.download(stationid, year)

    gdd = GD.gdd_cal_accum(stationid, year)
    figure_name = "Fig_GDD_{}.png".format(stationid)

    x = np.linspace(1, 13, 12)
    fig = plt.figure(num=1, figsize=(10, 6))
    plt.title("accumulated GDD " + Citi_name)
    plt.plot(x, gdd, label=year)
    plt.xlabel("Year/Time")
    plt.ylabel("accumulated GDD")
    plt.legend(bbox_to_anchor=(1, 1), loc=1)

    fig.savefig("../plots/" + figure_name)  #for saving figure
Esempio n. 10
0
def gdd_cal(stationid, year):

    Citi_name, df_rename = DW.download(stationid, year)

    gdd = []

    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][0:30]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][30:60]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][60:90]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][90:120]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][120:150]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][150:180]))

    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][180:210]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][210:240]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][240:270]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][270:300]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][300:330]))
    gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][330:364]))

    return gdd
Esempio n. 11
0
logger = logging.getLogger("CourseHandler")

if __name__ == "__main__":
    logger.info("Welcome to '%s' by %s", "WorkShop BDA", "Pascal Fares")
    logger.info("Course Version: %s", "0.0")
    logger.info("Course Name: %s",
                "Mastering Big Data Analytics with PySpark [Machine Learning & Data Mining Workshop]")

    logger.info("1. Step 1 : Install Java ")
    subprocess.check_call(['java','--version'])
    logger.info("Installing spark, needed packages and pyspark")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", '-r', 'requirement_me.txt'])
    logger.info("And Downloading the data")
    download_spark()
    
    download()

    # Set up the course launch pyspark
    logger.info("Set up environment of the course and launch pyspark with jupyter lab")
    HOME_PATH = Path(os.environ['HOME'])
    REPO_PATH = HOME_PATH / 'DataMiningSpark'
    BASE_DIR = REPO_PATH
    spark_home = 'sparkhome/spark-3.0.1-bin-hadoop2.7'
    data_examples = spark_home + "/data"
    data_sets = 'data-sets'
    os.environ['SPARK_LOCAL_IP'] = '127.0.0.1'
    os.environ['SPARK_HOME'] = str(BASE_DIR / spark_home)
    os.environ['DATA_SETS'] = str(BASE_DIR / data_sets)
    os.environ['DATA_EXAMPLES'] = str(BASE_DIR / data_examples)
    print(os.environ['SPARK_HOME'])
    os.environ['PYSPARK_DRIVER_PYTHON'] = 'jupyter'
Esempio n. 12
0
from datetime import datetime
import logist
import recommend
import download_data
import requests
import random
import json

app = Flask(__name__)
app.debug = True
app.config['SECRET_KEY'] = 'secret_key'
app.config['GOOGLEMAPS_KEY'] = "AIzaSyB0_0YF6CqoeTD5EpRBYBLPoeMYRXvjfk8"
GoogleMaps(app)
recom = recommend.Recommender()
logist = logist.Logistc()
down = download_data.download()

# firebase = firebase.FirebaseApplication('https://foodie-yelp.firebaseio.com', None)
firebase_ = down.firebase1


@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']
        users = firebase_.get('/users', None)
        if (username in users
                and password == ''.join(users[username]['password'].values())):
            session['logged_in'] = True
            session['username'] = username
Esempio n. 13
0
from os import listdir, remove
from os.path import isfile, join
from download_data import download
import glob
import sys

dataDir = "data"
csvFiles = [f for f in listdir(dataDir) if isfile(join(dataDir, f))]
tickers = [f.split("_")[0] for f in csvFiles]
tickers = list(set(tickers))  #unique

for ticker in tickers:
    fileList = glob.glob(join(dataDir, ticker + '*.csv'))
    for filePath in fileList:
        try:
            remove(filePath)
        except:
            print("Error while deleting file: ", filePath, ". :",
                  sys.exc_info())
        download(ticker)