def setUpClass(self): #TODO relative path to something better? self.df = data_helper.import_data("data/" + _TEST_DATA_FILE) self.odf = data_helper.import_operative_data("data/" + _OPERATIVE_TEST_DATA_FILE) self.apps = analysis.summarize_applications(self.df, self.odf) # print apps once for debugging print "Application summary based on test data:" print self.apps
def main(): args = parse_args() inputFileNameUsage = args['input_file_usage'] inputFileNameOperative = args['input_file_operative'] outputApplicationsFileName = args['output_file_applications'] outputUsersFileName = args['output_file_users'] predictionOutputFileName = args['prediction_output_file'] utils.log_config() logger = logging.getLogger(__name__) logger.info("Data file: {}".format(inputFileNameUsage)) if inputFileNameOperative: logger.info("Operative data file: {}".format(inputFileNameOperative)) else: logger.info("No operative data available.") startTime = datetime.datetime.now() # exported to global scope for debugging purposes global df df = data_helper.import_data(inputFileNameUsage) global odf if inputFileNameOperative: odf = data_helper.import_operative_data(inputFileNameOperative) else: odf = None logger.info("N of events: {}, from {} to {} ".format(len(df), df['datetime'].min(), df['datetime'].max())) create_user_summary(outputUsersFileName) create_application_summary(outputApplicationsFileName) # create_prediction_summary(predictionOutputFileName) print_stats(startTime)
default=os.getcwd() + "summary-users.csv") args = vars(parser.parse_args()) return args if __name__ == "__main__": pd.set_option('display.width', 240) args = parse_args() input_file_operative = args['input_file_operative'] input_file_usage = args['input_file_usage'] output_file_applications = args['output_file_applications'] output_file_users = args['output_file_users'] analysis_start_time = datetime.datetime.now() odf = data_helper.import_operative_data(input_file_operative) udf = data_helper.import_usage_data(input_file_usage) print("Total number of apps: {}".format(len(odf))) print("Total number of events: {} with time range from {} to {} ".format( len(udf), udf['datetime'].min(), udf['datetime'].max())) application_summary = analyze.summarize_applications(odf, udf) application_summary.to_csv(output_file_applications, sep=';', encoding='utf-8') user_summary = analyze.summarize_users(odf, udf) user_summary.to_csv(output_file_users, sep=';', encoding='utf-8') print("Analysis took {} seconds".format(datetime.datetime.now() -
def setUpClass(self): self.odf = data_helper.import_operative_data(_OPERATIVE_TEST_DATA_FILE) self.udf = data_helper.import_usage_data(_TEST_DATA_FILE) self.users = analyze.summarize_users(self.odf, self.udf)