def main(): # データをwebから取得 print('start: download_data') download_data(N_PERIOD) # web取得したデータを縦持ちのDataFrameにしてpickle化 print('start: make_basedata') make_basedata()
def run_all(): cleanup() download_data() unpack_data() crop_data() vectorize_data() create_dataframe() # cleanup() print("Done!")
def main(): platform = get_platform() if platform == "Darwin": reload(sys) sys.setdefaultencoding('utf8') credential = get_timeplan_credentials(platform) download_data(credential[0], credential[1], credential[2]) mashing() #credential = get_google_credentials() #upload_csv(credential[0], credential[1], credential[2]) exit(0)
def main(): start_time = time.time() if "download" in sys.argv: # download raw data from server raw_filenames = download_data(dates_to_download) print( "\n\nfinished downloading data, ending. please gzip the data, then run group, process, and clean." ) return if "group" in sys.argv: # Group available data by day group_by_hour() print( "\n\nfinished grouping data, ending. Next, run process, and clean." ) return if "process" in sys.argv: process_proximity() print('completed processing!') if "clean" in sys.argv: # clean up the data clean_up_data() if "analysis" in sys.argv: # create the analysis dataframes analyze_data() if "help" in sys.argv or len(sys.argv) == 1: print( "Please use arguments 'download', 'group', 'process', or 'clean'.") print("Total runtime: %s seconds" % (time.time() - start_time))
def turn_data_to_process(): data = download_data() i_to_c = download_champion_shit()[0] participants = [x['participants'] for x in data] targets = [0 if x['teams'][0]['winner'] else 1 for x in data] pre_ret = [] features = [] arg_list = sorted(list(i_to_c.copy().keys())) num_champs = len(list(i_to_c.copy().keys())) for p in participants: _list = [] for p2 in p: _list.append(arg_find(int(p2['championId']), arg_list)) pre_ret.append(_list) for p in pre_ret: hi = p[:5] pi = p[5:] team_one = [0] * num_champs team_two = [0] * num_champs for _ in hi: team_one[(_ - 1)] = 1 for _ in pi: team_one[(_ - 1)] = 1 yo = team_one + team_two features.append(yo) # return features, targets return BatchService(features, targets)
ask = input( "Have you already specified paths to working directories in PATHS.txt?(y/n)\n" ) while True: if ask == "y": print("\nOk\n") break elif ask == "n": print( "\nPlease open PATHS.txt and specify there paths to working directories.\n" ) exit() else: print("\nIncorrect input\n") continue if not os.path.exists(path_to_databases): os.makedirs(path_to_databases) if not os.path.exists(path_to_output_csv): os.makedirs(path_to_output_csv) download.download_data(path_to_databases) process.processing(path_to_databases, path_to_output_csv) import_to_neo.import_data(neo4j_home_dir, database_name, path_to_output_csv) finish = datetime.now() elapsed = finish - start print(datetime.now().time(), "finished") print("time elapsed", elapsed)
def setUp(self): download_data(DATA_URL, FILE) self.football = FootballData(FILE)
file_with_days_stop = open("cfg_day_stop", "r") line = file_with_days_stop.read(2) day_stop = int(line) file_with_days_stop.close() day_start = day_stop - 1 if ((month == 8) | (month == 9) | (month == 11)) and (day_stop == 1): day_start = 31 if ((month == 10) | (month == 12)) and (day_stop == 1): day_start = 30 print("Month: ", month, "Day start: ", day_start, " time start: ", hour_start, ":", minute_start, "Day stop: ", day_stop, " time stop: ", hour_stop, ":", minute_stop) print("Processing") print(chamber, layer) download.download_data(chamber, layer, year, month + 1, day_start, hour_start, minute_start, year, month + 1, day_stop, hour_stop, minute_stop) statinfo_I = os.stat('getDataSafely_I') statinfo_V = os.stat('getDataSafely_V') print("The size of current file is: ", statinfo_I.st_size) print("The size of voltage file is: ", statinfo_V.st_size) if (statinfo_I.st_size < 280) | (statinfo_V.st_size < 280): os.remove("getDataSafely_I") os.remove("getDataSafely_V") os.remove("getDataSafely_H") print("No data. Files removed") else: spark_counter_181031.count_sparks() os.remove("getDataSafely_I") os.remove("getDataSafely_V") os.remove("getDataSafely_H")
from download import download_data from run_models import run_models import os import shutil print("Downloading Data") download_data("../shared_config/data_sources.json", "../data/downloads") print("Running Models") run_models( "../shared_config/data_sources.json", "../data/downloads", "../data/forecasts", ) print("Clearing Cache") for filename in os.listdir("/nginx_cache"): filepath = os.path.join("/nginx_cache", filename) try: shutil.rmtree(filepath) except OSError: os.remove(filepath)
return self def find_team_with_min_goals_difference(self): ''' Find a team with minimum difference between goals scored for and against the team. Returns: The method returns team description. ''' min_SpT = self._add_goals_difference(col_name='D')._min('D') if min_SpT is not None: print("Minimal difference in goals found in dataframe entry:") print(min_SpT) return min_SpT.name else: print( "Cannot find a team with mimimal goal difference because of empty dataframe." ) return None if __name__ == "__main__": download_data(DATA_URL, FILE) football = FootballData(FILE) team = football.find_team_with_min_goals_difference() print(f"Minimal goal difference was achieved by team: {' '.join(team)}.") football.visualize()
import os import print_url_list as pul import download as dl url = 'http://mysql.taobao.org/monthly/' if os.path.exists('url_list.txt'): f = open('url_list.txt', 'r') f_str = f.read() f.close() url_list = f_str.split('\n') else: url_list = pul.get_url_list(url) if os.path.exists('downloaded_url_list.txt'): f = open('downloaded_url_list.txt') downloaded_url_list_str = f.read() f.close() downloaded_url_list = downloaded_url_list_str.split('\n') else: downloaded_url_list = [] download_url_list = list(set(url_list).difference(set(downloaded_url_list))) print 'there are' + str(len(download_url_list)) + ' url to download' print 'totall ' + str(len(url_list)) + ' urls' print str(len(downloaded_url_list)) + ' urls downloaded' print downloaded_url_list time.sleep(3) #os._exit() for i, element in enumerate(download_url_list): dl.download_data(element) print 'downloading' + ' ' + str(i + 1) + 'th url ' + element
import numpy as np from data_exploration import (categorical_features_plot, continuous_features_plot, get_data, impute_missing_values, univariate_table) from download import download_data from evaluation import eval_metrics, extract_model_params, proba_hist, roc_plot from train import one_hot_expand, split_and_normalise, train_model OUTPUT_DIR = "outputs" ############ # Download # ############ print("Downloading data...") path = download_data(".") print(f"\nSaved to {path}") ########################### # Preliminary exploration # ########################### df = get_data(path) if not os.path.isdir(OUTPUT_DIR): os.makedirs(OUTPUT_DIR, exist_ok=True) print("Making continuous features plot...") fig = continuous_features_plot(df) fname = os.path.join(OUTPUT_DIR, "continuous_features.png") fig.savefig(fname) print(f"Saved to {fname}")
def setUp(self): download_data(DATA_URL, FILE) self.weather = WeatherData(FILE)