def main(): """ Main function :return: """ # Create output dictionary # Headers: | scenario | one student | Betreuungsberufe | Detailhandelsberufe | assignment = [] record = { 'scenario': const.chosen_scenario, 'one student': const.at_least_one_student, 'Betreuungsberufe': const.scenarios['scenarios'][const.chosen_scenario]['berufsgattung'] ['Betreuungsberufe']['number'], 'Detailhandelsberufe': const.scenarios['scenarios'][const.chosen_scenario]['berufsgattung'] ['Detailhandelsberufe']['number'] } assignment.append(record) # Cast dictionary to dataframe, no matter if the result is infeasible or optimal # Sorted on Lehrgang first, Gemeinde second print('') print('Writing CSV file') assignment_df = pd.DataFrame.from_records(assignment) assignment_df.set_index(['scenario'], inplace=True) output_name = 'output_{}_{}'.format('scenario', const.chosen_scenario) helper.write_to_csv(assignment_df, output_name)
def test(): # logging.basicConfig(level=logging.INFO) query = HKEX_API(from_date=n_yearsago(n=1), to_date=today()) for data in query.data: try: url = data.file_link # url = 'https://www.dropbox.com/' print(url) pdf = PDF(url) pdf_obj = pdf.pdf_obj f = AuditFee(pdf_obj) tab_sum = [] for table in f.tables: tab_sum.append(table.summary) except KeyboardInterrupt: break except Exception as e: # print(e) result = { 'table_summary' : e, 'ERROR': True, 'url' : url, } write_to_csv(result, 'result_3.csv') continue else: # print('ok') result = { 'table_summary' : list(filter(None, tab_sum)), 'ERROR': None, 'url' : url } write_to_csv(result, 'result_3.csv')
def create_output(self): dict_of_variables = { 'production_variables': self.production_variables, 'inventory_variables': self.inventory_variables } output_df = write_outputs(dict_of_variables, attr='x') write_to_csv(output_df)
def create_output(self): dict_of_variables = { 'production_variables': self.production_variables, 'inventory_variables': self.inventory_variables } output_df = write_outputs_xpress(dict_of_variables, self.model) write_to_csv(output_df)
# ================== Optimization ================== if model_params['write_lp']: logger.info('Writing the lp file!') model.export_as_lp('./{}.lp'.format(model.name)) logger.info('Optimization starts!') # If CPLEX is installed locally, we can use that to solve the problem. # Otherwise, we can use DOcplexcloud. For docloud solve, we need valid 'url' and 'key'. # Note, that if 'url' and 'key' parameters are present, # the solve will be started on DOcplexcloud even if CPLEX is available. # I've provided more info on this in optimization_model_docplex.py # For now, a simple way to handle local or docloud solve if model_params['cplex_cloud']: model.solve(url=model_params['url'], key=model_params['api_key']) else: model.solve() if model.solve_details.status == 'optimal': logger.info('The solution is optimal and the objective value ' 'is ${:,.2f}!'.format(model.objective_value)) # ================== Output ================== dict_of_variables = {'production_variables': production_variables, 'inventory_variables': inventory_variables} output_df = process_data.write_outputs(dict_of_variables, attr='solution_value') helper.write_to_csv(output_df) logger.info('Outputs are written to csv!')
def fit_model_by_condition(subj_idx=0, n=1, n_training_conditions=9, test_conditions='all'): ''' NB: This script can (and should) be run in parallel in several different python consoles, one subject per console subj_idx: 0 to 6 to obtain individual fits, or 'all' to fit to group-averaged data n: number of repeated fits per condition (n>1 can be used to quickly check robustness of model fitting) n_training_conditions: defines how many conditions will be included in the training set (4, 8, or 9) For `4`, training data for each condition (TTA, d) will be the decisions where both TTA and d are different from those of the current condition. For `8`, all other conditions will be included. test_conditions: 'all' to cross-validate model on all nine conditions, or a list of dicts with conditions for which to fit the model ''' modelTtaBounds = model_definitions.ModelTtaBounds(ndt='gaussian') exp_data = pd.read_csv('../data/measures.csv', usecols=[ 'subj_id', 'RT', 'is_turn_decision', 'tta_condition', 'd_condition' ]) subjects = exp_data.subj_id.unique() if test_conditions == 'all': conditions = [{ 'tta': tta, 'd': d } for tta in exp_data.tta_condition.unique() for d in exp_data.d_condition.unique()] else: conditions = test_conditions if subj_idx == 'all': subj_id = 'all' subj_data = exp_data loss = model_definitions.LossWLSVincent else: subj_id = subjects[subj_idx] subj_data = exp_data[(exp_data.subj_id == subj_id)] loss = model_definitions.LossWLS directory = ( '../model_fit_results/%s/' % ('full_data' if n_training_conditions == 9 else 'cross_validation_%i' % (n_training_conditions))) file_name = 'subj_%s.csv' % (str(subj_id)) if n > 1: helper.write_to_csv(directory, file_name, ['subj_id', 'tta', 'd', 'n', 'loss'] + modelTtaBounds.param_names) else: helper.write_to_csv(directory, file_name, ['subj_id', 'tta', 'd', 'loss'] + modelTtaBounds.param_names) for i in range(n): print('Training conditions: %i' % (n_training_conditions)) print(subj_id) if n_training_conditions == 9: training_data = subj_data print('len(training_data): ' + str(len(training_data))) fitted_model = helper.fit_model(modelTtaBounds.model, training_data, loss) if n > 1: helper.write_to_csv(directory, file_name, [ subj_id, 'NA', 'NA', i, fitted_model.get_fit_result().value() ] + fitted_model.get_model_parameters()) else: helper.write_to_csv(directory, file_name, [ subj_id, 'NA', 'NA', fitted_model.get_fit_result().value() ] + fitted_model.get_model_parameters()) else: for condition in conditions: print(condition) if n_training_conditions == 8: training_data = subj_data[( ~(subj_data.tta_condition == condition['tta']) | ~(subj_data.d_condition == condition['d']))] elif n_training_conditions == 4: training_data = subj_data[( ~(subj_data.tta_condition == condition['tta']) & ~(subj_data.d_condition == condition['d']))] else: raise (ValueError( 'n_training_conditions should be one of [9, 8, 4]')) print('len(training_data): ' + str(len(training_data))) fitted_model = helper.fit_model(modelTtaBounds.model, training_data, loss) if n > 1: helper.write_to_csv(directory, file_name, [ subj_id, condition['tta'], condition['d'], n, fitted_model.get_fit_result().value() ] + fitted_model.get_model_parameters()) else: helper.write_to_csv(directory, file_name, [ subj_id, condition['tta'], condition['d'], fitted_model.get_fit_result().value() ] + fitted_model.get_model_parameters())
# consecutive_pages = pages consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))] # consecutive_pages = sorted(flatten([li for li in consecutive_int_list(list(set(pages))) if len(li) > 1])) # consecutive_pages = [tuple(li) for li in consecutive_int_list(list(set(pages))) if len(li) > 1] return consecutive_pages if __name__ == "__main__": import get_pdf from test_cases import test_cases from hkex import get_data from helper import write_to_csv # logging.basicConfig(level=logging.INFO) for data in get_data(): result = {} url = data.file_link csv = 'indpt_audit_report_2.csv' pdf_obj = get_pdf.byte_obj_from_url(url) try: py_pdf = get_pdf.by_pypdf(pdf_obj) except: continue toc = _get_toc(pdf) # print(toc) pattern = r'^(?!.*internal)(?=.*report|responsibilities).*auditor.*$' pages = _get_page_by_outline(toc, pattern) or _get_page_by_page_title_search(get_pdf.by_pdfplumber(pdf_obj), pattern) result['result'] = pages result['toc'] = 'available' if toc else 'unavailable' result['url'] = url write_to_csv(result, csv) pdf_obj.close()