def do_tax(salary, gongjijin_rate=0.07): res_insuranced = after_insurance(salary, gongjijin_rate=gongjijin_rate) res = after_tax(res_insuranced) msg = 'salary {} after_insurance {} after tax {}'.format( salary, res_insuranced, res) Logger.log(msg, tag_name=LogConst.MONTHLY_INCOME) return res
def year_end_tax(salary): taxRates = { 36000: 0.03, 144000: 0.1, 300000: 0.2, 420000: 0.25, 660000: 0.3, 960000: 0.35, 99999999999: 0.45 } START = 0 * 12 temp = salary - START idvTax = 0 preBar = 0 for bar, rate in taxRates.items(): if temp <= 0: break if temp - (bar - preBar) <= 0: idvTax += rate * temp break else: idvTax += rate * (bar - preBar) temp -= bar - preBar preBar = bar msg = "税前年收入: {} 年纳税额: {} 税后年收入: {}".format(salary, idvTax, salary - idvTax) Logger.log(msg, tag_name=LogConst.YEAR_END_INCOME) return salary - idvTax
def get_invest_month(self,cost): bottom = VauleConfig.fluctuation_bottom top = VauleConfig.fluctuation_top rate = random.uniform(bottom,top) msg = '[fluctuation_rate]{}'.format(rate) Logger.log(msg, tag_name=LogConst.FLUCTUATION_RATE) return cost * (1+rate)
def svr_xml_converter(raw_data): ''' This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: for key in observation: if key == 'criterion': observation_label = observation['criterion'] list_observation_label.append(observation[key]) elif key == 'predictor': for predictor in observation[key]: predictor_label = predictor['label'] predictor_value = predictor['value'] validate_value = Validate_Dataset(predictor_value) validate_value.validate_value() list_error_value = validate_value.get_errors() if list_error_value: logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(predictor_label), 'indep_variable_value': predictor_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['predictor']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def process_month(self, year_index, month_index): Logger.tail_log('第{}年'.format(year_index), tag_name=LogConst.MONTHLY_LOG_NAME) Logger.log('第{}月'.format(month_index), tag_name=LogConst.MONTHLY_LOG_NAME) income = self.income_manager.calc_income_month() gongjijin = self.income_manager.calc_income_ggj() gjj_debt = self.outcome_manager.calc_outcome_month_debt_gjj() other_outcome = self.outcome_manager.calc_outcome_month_debt_except_gjj( ) gjj_tiqu = 0 if gjj_debt > gongjijin: invest = income + gongjijin - gjj_debt - other_outcome else: invest = income - other_outcome self._gjj_account += gongjijin - gjj_debt if month_index % 3 == 0 and not Decision.decide_to_buy_house(): gjj_tiqu_max = 6000 * 2 gjj_tiqu = gjj_tiqu_max if self._gjj_account > gjj_tiqu_max else self._gjj_account self._gjj_account -= gjj_tiqu invest += gjj_tiqu msg = '[income]{} [gongjijin]{} [gjj_debt]{} [other_outcome]{} [gjj_tiqu]{} [invest]{}' \ ''.format(income, gongjijin, gjj_debt, other_outcome, gjj_tiqu, invest) Logger.log(msg, tag_name=LogConst.MONTHLY_LOG_NAME) self.invest_manager.invest_process_month() self.invest_manager.throw_money(invest) self.print_account()
def process_year_end(self): income = self.income_manager.calc_income_year_end() outcome = self.outcome_manager.calc_outcome_year_end() invest = income - outcome msg = '[income]{} [outcome]{} [invest]{}'.format( income, outcome, invest) Logger.log(msg, tag_name=LogConst.EXPECTATION_LOG_NAME) self.invest_manager.throw_money(income) self.print_account()
def create_app(): # define configuration with open('hiera/settings.yaml', 'r') as stream: try: # local variables app = Flask( __name__, template_folder='interface/templates', static_folder='interface/static' ) settings = yaml.load(stream) # register blueprint app.register_blueprint(blueprint) # local logger: used for this module root = settings['general']['root'] LOG_PATH = root + '/' + settings['webserver']['flask_log_path'] HANDLER_LEVEL = settings['application']['log_level'] # flask attributes: accessible across application app.config.update( HOST=settings['general']['host'], PORT_REDIS=settings['redis']['port'], ROOT=settings['general']['root'], DB_LOG_PATH=settings['database']['log_path'], DB_ML=settings['database']['name'], DB_USERNAME=settings['database']['username'], DB_PASSWORD=settings['database']['password'], LOG_LEVEL=HANDLER_LEVEL, FLASK_LOG_PATH=settings['webserver']['flask_log_path'], ERROR_LOG_PATH=settings['application']['error_log_path'], WARNING_LOG_PATH=settings['application']['warning_log_path'], INFO_LOG_PATH=settings['application']['info_log_path'], DEBUG_LOG_PATH=settings['application']['debug_log_path'], MODEL_TYPE=settings['application']['model_type'] ) except yaml.YAMLError as error: logger = Logger('error', 'yaml') logger.log(error) # log handler: requires the below logger formatter = logging.Formatter( "[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s") handler = RotatingFileHandler(LOG_PATH, maxBytes=10000000, backupCount=5) handler.setLevel(HANDLER_LEVEL) handler.setFormatter(formatter) app.logger.addHandler(handler) # logger: complements the log handler log = logging.getLogger('werkzeug') log.setLevel(logging.DEBUG) log.addHandler(handler) # return return app
def print_details(self): msg = '{}: 总借{:.2f} 总还{:.2f} 每月还{:.2f} 年数{} 利率{} 折扣{}' \ ''.format(self.tag_name, self.loan_money, self.final_repayment(), self.repay_per_month(), self.years, self.year_percent, self.discount) Logger.log(msg, tag_name=LogConst.LOAN_LOG_NAME)
def svr_xml_converter(raw_data): """ This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. """ feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, "error", "error") # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset["dataset"]["observation"]: for key in observation: if key == "criterion": observation_label = observation["criterion"] list_observation_label.append(observation[key]) elif key == "predictor": for predictor in observation[key]: predictor_label = predictor["label"] predictor_value = predictor["value"] validate_value = Validate_Dataset(predictor_value) validate_value.validate_value() list_error_value = validate_value.get_errors() if list_error_value: logger.log(list_error_value) return None else: list_dataset.append( { "dep_variable_label": str(observation_label), "indep_variable_label": str(predictor_label), "indep_variable_value": predictor_value, } ) # generalized feature count in an observation if not feature_count: feature_count = len(observation["predictor"]) # save observation labels, and return raw_data.close() return {"dataset": list_dataset, "observation_labels": list_observation_label, "feature_count": feature_count}
def process_year_end(self, working_month): income = self.income_manager.calc_income_year_end( ) * working_month / 12.0 outcome = self.outcome_manager.calc_outcome_year_end() invest = income - outcome msg = '[income]{} [outcome]{} [invest]{}'.format( income, outcome, invest) Logger.log(msg, tag_name=LogConst.YEAR_END_LOG_NAME) self.invest_manager.throw_money(income) self.print_account()
def invest_process_month(self): res = 0 for i, invest in self.invests.iteritems(): percent = self.percents[i] if percent == 0: continue money = self.money_throwed * percent invest_money = invest.get_invest_month(money) msg = '{} invest_process_month {}'.format(self.names[i], invest_money) Logger.log(msg, tag_name=LogConst.MONTHLY_INVEST) res += invest_money self.money_throwed = res
def process_expectation(): sum = 0 iter_times = 10000 for i in xrange(iter_times): income_processor = Account() income_processor.process_all() one = income_processor.all_assets_value() msg = '{} : {}'.format(i, one) Logger.log(msg, tag_name=Const.MAIN_LOG_NAME) sum += one avg = sum / iter_times msg = 'avg: {}'.format(avg) Logger.log(msg, tag_name=Const.MAIN_LOG_NAME)
def house_loan_details(self, gjj_loan, sd_loan): loan_money = gjj_loan + sd_loan house_money = loan_money / 0.65 msg = "公积金贷款:{} 商贷:{}" \ "\n对应首付:{:.2f}(贷款65% 首付%35)" \ "\n其他金额{} (约3.5%)" \ "\n房子价格: {}" \ "\n首付交易价格: {}" \ "\n https://www.zhihu.com/question/24534079" \ "".format(gjj_loan, sd_loan, house_money * 0.35, house_money * 0.035, house_money, house_money * (0.35 + 0.035)) Logger.log(msg, tag_name=LogConst.HOUSE_LOAN_DETAIL)
def process_response(self, request, response, spider): Logger.log({"response_status": response.status, "url": response.url}, "response") return response
def svm_json_converter(raw_data, is_json): '''@svm_json_converter This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') if is_json: dataset = raw_data else: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # validation (part 1) validate_olabel = Validate_Dataset(observation_label) validate_olabel.validate_label() # dependent variable with single observation if type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # dependent variable with multiple observations elif type(observations) == dict: for feature_label, feature_value in observations.items(): # validation (part 2) validate_flabel = Validate_Dataset(feature_label) validate_flabel.validate_label() validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # list of observation label observation_labels.append(observation_label) # check for errors olabel_error = validate_olabel.get_errors() flabel_error = validate_flabel.get_errors() fvalue_error = validate_fvalue.get_errors() for error in [olabel_error, flabel_error, fvalue_error]: if error: logger.log(error) if error and len(error) > 0: return None # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def svm_json2dict(raw_data, is_json): ''' This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # dependent variable with single observation if type(observations) == dict: for feature_label, feature_value in observations.items(): # validation validate_fvalue = Validator(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # dependent variable with multiple observations elif type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation validate_fvalue = Validator(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # list of observation label observation_labels.append(observation_label) # programmatic-interface else: dataset = raw_data observation_label = raw_data[0] # list of observation label observation_labels.append(observation_label) # dependent variable with single observation if type(raw_data[1]) == dict: for label, feature in raw_data[1].items(): # validation validate_fvalue = Validator(feature) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': feature }) # generalized feature count in an observation if not feature_count: feature_count = len(raw_data[1]) # dependent variable with multiple observations if type(raw_data[1]) == list: for feature_set in raw_data[1]: for feature_label, feature_value in feature_set.items(): # validation validate_fvalue = Validator(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(feature_set) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def svm_csv_converter(raw_data): ''' This method converts the supplied csv file-object, intended for an svm model, to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U' parameter when opening 'raw_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') ''' feature_count = None list_dataset = [] list_observation_label = [] list_feature_label = [] logger = Logger(__name__, 'error', 'error') # open temporary 'csvfile' reader object dataset_reader = csv.reader( raw_data, delimiter=' ', quotechar='|' ) # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): list_feature_label.append(str(value)) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: list_observation_label.append(str(value)) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not feature_count: feature_count = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None) ): try: validate = Validate_Dataset(value) validate.validate_value() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: value = float(value) except Exception as error: logger.log(error) return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def print_account(self): msg = '[cash_account]{} [investing_account]{} [gongjijin_account]{}' \ ''.format(self.cash_account, self.invest_account, self.gjj_account) Logger.log(msg, tag_name=LogConst.ACCOUNT_LOG_NAME)
def svm_model(kernel_type, session_id, feature_request, list_error): '''@svm_model This method generates an svm prediction using the provided prediction feature input(s), and the stored corresponding model, within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id) feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if feature_count['error']: logger.log(feature_count['error']) list_error.append(feature_count['error']) feature_count = None else: feature_count = feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): if not (index+1) % feature_count == 0: # observation labels current_features.append(feature[1][0]) # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) else: # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) current_features.append(feature[1][0]) grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create svm model clf = svm.SVC(kernel=kernel_type) clf.fit(grouped_features, encoded_labels) # get svm title, and cache (model, encoded labels, title) entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] Cache_Model(clf).cache( 'svm_rbf_model', str(session_id) + '_' + title ) Cache_Model(label_encoder).cache('svm_rbf_labels', session_id) Cache_Hset().cache('svm_rbf_title', session_id, title) # cache svm feature labels, with respect to given session id Cache_Hset().cache( 'svm_rbf_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def svm_json_converter(raw_data, is_json): ''' This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for observation_label in dataset: # variables observations = dataset[observation_label] # dependent variable with single observation if type(observations) == dict: for feature_label, feature_value in observations.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observations) # dependent variable with multiple observations elif type(observations) == list: for observation in observations: for feature_label, feature_value in observation.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation) # list of observation label observation_labels.append(observation_label) # programmatic-interface else: dataset = raw_data observation_label = raw_data[0] # list of observation label observation_labels.append(observation_label) # dependent variable with single observation if type(raw_data[1]) == dict: for label, feature in raw_data[1].items(): # validation validate_fvalue = Validate_Dataset(feature) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': feature }) # generalized feature count in an observation if not feature_count: feature_count = len(raw_data[1]) # dependent variable with multiple observations if type(raw_data[1]) == list: for feature_set in raw_data[1]: for feature_label, feature_value in feature_set.items(): # validation validate_fvalue = Validate_Dataset(feature_value) validate_fvalue.validate_value() if validate_fvalue.get_errors(): logger.log(validate_fvalue.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(feature_label), 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(feature_set) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def sv_model(model, kernel_type, session_id, feature_request, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Cache_Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Cache_Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Retrieve_Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Cache_Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Cache_Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Cache_Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def print_account(self): cash = self.current invest_account = self.invest_manager.check_curr_throwed_money() msg = '[cash]{} [investing_account]{}'.format(cash, invest_account) Logger.log(msg, tag_name=LogConst.EXPECTATION_LOG_NAME)
def generate(model, kernel_type, session_id, feature_request, list_error): ''' This method generates an sv (i.e. svm, or svr) model using feature data, retrieved from the database. The generated model, is then stored within the NoSQL datastore. @grouped_features, a matrix of observations, where each nested vector, or python list, is a collection of features within the containing observation. @encoded_labels, observation labels (dependent variable labels), encoded into a unique integer representation. ''' # local variables dataset = feature_request.get_dataset(session_id, model) get_feature_count = feature_request.get_count(session_id) label_encoder = preprocessing.LabelEncoder() logger = Logger(__name__, 'error', 'error') list_model_type = current_app.config.get('MODEL_TYPE') # get dataset if dataset['error']: logger.log(dataset['error']) list_error.append(dataset['error']) dataset = None else: dataset = numpy.asarray(dataset['result']) # get feature count if get_feature_count['error']: logger.log(get_feature_count['error']) list_error.append(get_feature_count['error']) feature_count = None else: feature_count = get_feature_count['result'][0][0] # check dataset integrity, build model if len(dataset) % feature_count == 0: features_list = dataset[:, [[0], [2], [1]]] current_features = [] grouped_features = [] observation_labels = [] feature_labels = [] # group features into observation instances, record labels for index, feature in enumerate(features_list): # svm: observation labels if model == list_model_type[0]: current_features.append(feature[1][0]) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(feature[0][0]) current_features = [] # svr: observation labels elif model == list_model_type[1]: current_features.append(float(feature[1][0])) if (index+1) % feature_count == 0: grouped_features.append(current_features) observation_labels.append(float(feature[0][0])) current_features = [] # general feature labels in every observation if not len(feature_labels) == feature_count: feature_labels.append(feature[2][0]) # case 1: svm model if model == list_model_type[0]: # convert observation labels to a unique integer representation label_encoder = preprocessing.LabelEncoder() label_encoder.fit(dataset[:, 0]) encoded_labels = label_encoder.transform(observation_labels) # create model clf = svm.SVC(kernel=kernel_type, probability=True) # cache encoded labels Model(label_encoder).cache(model + '_labels', session_id) # fit model clf.fit(grouped_features, encoded_labels) # case 2: svr model elif model == list_model_type[1]: # create model clf = svm.SVR(kernel=kernel_type) # fit model clf.fit(grouped_features, observation_labels) # compute, and cache coefficient of determination r2 = clf.score(grouped_features, observation_labels) Hset().cache( model + '_r2', session_id, r2 ) # get title entity = Entity() title = entity.get_title(session_id)['result'][0][0] # cache model, title Model(clf).cache( model + '_model', str(session_id) + '_' + title ) Hset().cache(model + '_title', session_id, title) # cache feature labels, with respect to given session id Hset().cache( model + '_feature_labels', str(session_id), json.dumps(feature_labels) ) # return error(s) if exists return {'error': list_error}
def svm_xml_converter(raw_data): '''@svm_xml_converter This method converts the supplied xml file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. ''' feature_count = None list_dataset = [] list_observation_label = [] logger = Logger(__name__, 'error', 'error') # convert xml file to python 'dict' dataset = xmltodict.parse(raw_data) # build 'list_dataset' for observation in dataset['dataset']['observation']: observation_label = observation['dependent-variable'] validate = Validate_Dataset(observation_label) validate.validate_label() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: list_observation_label.append(observation_label) for feature in observation['independent-variable']: feature_label = feature['label'] feature_value = feature['value'] validate_label = Validate_Dataset(feature_label) validate_value = Validate_Dataset(feature_value) validate_label.validate_label() validate_value.validate_value() list_error_label = validate.get_errors() list_error_value = validate.get_errors() if list_error_label or list_error_value: logger.log(list_error_label) logger.log(list_error_value) return None else: list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': feature_label, 'indep_variable_value': feature_value }) # generalized feature count in an observation if not feature_count: feature_count = len(observation['independent-variable']) # save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
def svr_json_converter(raw_data, is_json): '''@svr_json_converter This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for criterion, predictors in dataset.items(): observation_label = criterion # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(str(predictor)) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': observation_label, 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors) # criterion with multiple observation if type(predictors) == list: for criterion in predictors: for label, predictor in criterion.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(criterion.items()) # programmatic-interface else: dataset = raw_data for criterion, predictors in dataset.items(): # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors.items()) # criterion with multiple observation if type(predictors) == list: for single_predictors in predictors: for label, predictor in single_predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(single_predictors.items()) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
def svr_json_converter(raw_data, is_json): ''' This method converts the supplied json file-object to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @is_json, flag indicating 'raw_data' is a json string. @observation_labels, is a list containing dependent variable labels. ''' # local variables feature_count = None list_dataset = [] observation_labels = [] logger = Logger(__name__, 'error', 'error') # web-interface if not is_json: dataset = json.load(raw_data) for criterion, predictors in dataset.items(): observation_label = criterion # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors) # criterion with multiple observation if type(predictors) == list: for criterion in predictors: for label, predictor in criterion.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(observation_label), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(criterion.items()) # programmatic-interface else: dataset = raw_data for criterion, predictors in dataset.items(): # list of observation label observation_labels.append(criterion) # criterion with single observation if type(predictors) == dict: for label, predictor in predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(predictors.items()) # criterion with multiple observation if type(predictors) == list: for single_predictors in predictors: for label, predictor in single_predictors.items(): # validation (part 1) validate_predictor = Validate_Dataset(predictor) validate_predictor.validate_value() if validate_predictor.get_errors(): logger.log(validate_predictor.get_errors()) else: # restructured data list_dataset.append({ 'dep_variable_label': str(criterion), 'indep_variable_label': str(label), 'indep_variable_value': predictor }) # generalized feature count in an observation if not feature_count: feature_count = len(single_predictors.items()) # close file if not is_json: raw_data.close() # save observation labels, and return return { 'dataset': list_dataset, 'observation_labels': observation_labels, 'feature_count': feature_count }
PORT_REDIS=settings['redis']['port'], ROOT=settings['general']['root'], DB_LOG_PATH=settings['database']['log_path'], DB_ML=settings['database']['name'], DB_USERNAME=settings['database']['username'], DB_PASSWORD=settings['database']['password'], LOG_LEVEL=HANDLER_LEVEL, FLASK_LOG_PATH=settings['webserver']['flask_log_path'], ERROR_LOG_PATH=settings['application']['error_log_path'], WARNING_LOG_PATH=settings['application']['warning_log_path'], INFO_LOG_PATH=settings['application']['info_log_path'], DEBUG_LOG_PATH=settings['application']['debug_log_path'], ) except yaml.YAMLError as error: logger = Logger('error', 'yaml') logger.log(error) # log handler: requires the below logger formatter = logging.Formatter( "[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s") handler = RotatingFileHandler(LOG_PATH, maxBytes=10000000, backupCount=5) handler.setLevel(HANDLER_LEVEL) handler.setFormatter(formatter) app.logger.addHandler(handler) # logger: complements the log handler log = logging.getLogger('werkzeug') log.setLevel(logging.DEBUG) log.addHandler(handler) # run application
def create_app(args={'prefix': '', 'settings': ''}): # path to hiera if args['prefix']: path = 'hiera/' + args['prefix'] + '/hiera/settings.yaml' else: path = 'hiera/settings.yaml' try: # define configuration with open(path, 'r') as stream: # local variables if args['settings']: app = Flask( __name__, args['settings'], template_folder='interface/templates', static_folder='interface/static', ) else: app = Flask( __name__, template_folder='interface/templates', static_folder='interface/static', ) settings = yaml.load(stream) # register blueprint app.register_blueprint(blueprint) # local logger: used for this module root = settings['general']['root'] LOG_PATH = root + settings['webserver']['flask_log_path'] HANDLER_LEVEL = settings['application']['log_level'] # flask attributes: accessible across application app.config.update( HOST=settings['general']['host'], REDIS_HOST=settings['redis']['host'], REDIS_PORT=settings['redis']['port'], ROOT=settings['general']['root'], DB_HOST=settings['database']['host'], DB_LOG_PATH=settings['database']['log_path'], DB_ML=settings['database']['name'], DB_USERNAME=settings['database']['username'], DB_PASSWORD=settings['database']['password'], LOG_LEVEL=HANDLER_LEVEL, FLASK_LOG_PATH=settings['webserver']['flask_log_path'], ERROR_LOG_PATH=settings['application']['error_log_path'], WARNING_LOG_PATH=settings['application']['warning_log_path'], INFO_LOG_PATH=settings['application']['info_log_path'], DEBUG_LOG_PATH=settings['application']['debug_log_path'], MODEL_TYPE=settings['application']['model_type'], SALT_LENGTH=settings['crypto']['salt_length'], USER_ID=0, ) # log handler: requires the below logger formatter = logging.Formatter( "[%(asctime)s] {%(pathname)s:%(lineno)d} " "%(levelname)s - %(message)s") handler = RotatingFileHandler(LOG_PATH, maxBytes=10000000, backupCount=5) handler.setLevel(HANDLER_LEVEL) handler.setFormatter(formatter) app.logger.addHandler(handler) # logger: complements the log handler log = logging.getLogger('werkzeug') log.setLevel(logging.DEBUG) log.addHandler(handler) # return return app except Exception as error: logger = Logger('error', 'yaml') logger.log(error) raise
def svm_csv2dict(raw_data): ''' This method converts the supplied csv file-object, intended for an svm model, to a python dictionary. @raw_data, generally a file (or json string) containing the raw dataset(s), to be used when computing a corresponding model. If this argument is a file, it needs to be closed. @list_observation_label, is a list containing dependent variable labels. Note: we use the 'Universal Newline Support' with the 'U' parameter when opening 'raw_data'. This allows newlines to be understood regardless, if the newline character was created in osx, windows, or linux. Note: since 'row' is a list, with one comma-delimited string element, the following line is required in this method: row = row[0].split(',') ''' feature_count = None list_dataset = [] list_observation_label = [] list_feature_label = [] logger = Logger(__name__, 'error', 'error') # open temporary 'csvfile' reader object dataset_reader = csv.reader( raw_data, delimiter=' ', quotechar='|' ) # iterate first row of csvfile for row in islice(dataset_reader, 0, 1): # iterate each column in a given row row_indep_label = row[0].split(',') for value in islice(row_indep_label, 1, None): list_feature_label.append(str(value)) # iterate all rows of csvfile for dep_index, row in enumerate(islice(dataset_reader, 0, None)): # iterate first column of each row (except first) row_dep_label = row[0].split(',') for value in row_dep_label[:1]: list_observation_label.append(str(value)) # generalized feature count in an observation row_indep_variable = row[0].split(',') if not feature_count: feature_count = len(row_indep_variable) - 1 # iterate each column in a given row for indep_index, value in enumerate( islice(row_indep_variable, 1, None) ): try: validate = Validator(value) validate.validate_value() list_error = validate.get_errors() if list_error: logger.log(list_error) return None else: value = float(value) except Exception as error: logger.log(error) return False list_dataset.append({ 'dep_variable_label': list_observation_label[dep_index], 'indep_variable_label': list_feature_label[indep_index], 'indep_variable_value': value }) # close file, save observation labels, and return raw_data.close() return { 'dataset': list_dataset, 'observation_labels': list_observation_label, 'feature_count': feature_count }
class SQL(object): '''@SQL This class provides an interface to connect, execute commands, and disconnect from a SQL database. It explicitly inherits pythons 'new-style' class. Note: this class is invoked within 'save_xx.py', and 'retrieve_xx.py' modules. Note: this class explicitly inherits the 'new-style' class. ''' def __init__(self, host=None, user=None, passwd=None): '''@__init__ This constructor is responsible for defining class variables. ''' self.db_settings = Database() self.list_error = [] self.proceed = True # database logger self.logger = Logger(__name__, 'database', 'database', 'debug') # host address if host: self.host = host else: self.host = self.db_settings.get_db_host() # sql username for above host address if user: self.user = user else: self.user = self.db_settings.get_db_username() # sql password for above username if passwd: self.passwd = passwd else: self.passwd = self.db_settings.get_db_password() def sql_connect(self, database=None): '''@sql_connect This method is responsible for defining the necessary interface to connect to a SQL database. ''' try: if database is None: self.conn = DB.connect( self.host, self.user, self.passwd, ) else: self.conn = DB.connect( self.host, self.user, self.passwd, db=database, ) self.cursor = self.conn.cursor() # log successful connection if (self.cursor): self.logger.log('database connected: success') return { 'status': True, 'error': None, 'id': None, } except DB.Error, error: self.proceed = False self.list_error.append(error) # log unsuccessful connection if (self.cursor): self.logger.log('database connected: fail') return { 'status': False, 'error': self.list_error, 'id': None, }