Esempio n. 1
0
def main():
    """
    Main function
    :return:
    """

    # Create output dictionary
    # Headers: | scenario | one student | Betreuungsberufe | Detailhandelsberufe |
    assignment = []
    record = {
        'scenario':
        const.chosen_scenario,
        'one student':
        const.at_least_one_student,
        'Betreuungsberufe':
        const.scenarios['scenarios'][const.chosen_scenario]['berufsgattung']
        ['Betreuungsberufe']['number'],
        'Detailhandelsberufe':
        const.scenarios['scenarios'][const.chosen_scenario]['berufsgattung']
        ['Detailhandelsberufe']['number']
    }
    assignment.append(record)

    # Cast dictionary to dataframe, no matter if the result is infeasible or optimal
    # Sorted on Lehrgang first, Gemeinde second
    print('')
    print('Writing CSV file')
    assignment_df = pd.DataFrame.from_records(assignment)
    assignment_df.set_index(['scenario'], inplace=True)
    output_name = 'output_{}_{}'.format('scenario', const.chosen_scenario)
    helper.write_to_csv(assignment_df, output_name)
    def test():
        # logging.basicConfig(level=logging.INFO)
        query = HKEX_API(from_date=n_yearsago(n=1), to_date=today())
        for data in query.data:
            try:
                url = data.file_link
                # url = 'https://www.dropbox.com/'
                print(url)

                pdf = PDF(url)
                pdf_obj = pdf.pdf_obj
                f = AuditFee(pdf_obj) 
                tab_sum = []
                for table in f.tables:
                    tab_sum.append(table.summary)
            except KeyboardInterrupt:
                break
            except Exception as e:
                # print(e)
                result = {
                'table_summary' : e,
                'ERROR': True,
                'url' : url,
                }
                write_to_csv(result,  'result_3.csv')
                continue
            else:
                # print('ok')
                result = {
                'table_summary' : list(filter(None, tab_sum)),
                'ERROR': None,
                'url' : url
                }
                write_to_csv(result,  'result_3.csv')
    def create_output(self):
        dict_of_variables = {
            'production_variables': self.production_variables,
            'inventory_variables': self.inventory_variables
        }

        output_df = write_outputs(dict_of_variables, attr='x')
        write_to_csv(output_df)
    def create_output(self):
        dict_of_variables = {
            'production_variables': self.production_variables,
            'inventory_variables': self.inventory_variables
        }

        output_df = write_outputs_xpress(dict_of_variables, self.model)
        write_to_csv(output_df)
# ================== Optimization ==================
if model_params['write_lp']:
    logger.info('Writing the lp file!')
    model.export_as_lp('./{}.lp'.format(model.name))

logger.info('Optimization starts!')

# If CPLEX is installed locally, we can use that to solve the problem.
# Otherwise, we can use DOcplexcloud. For docloud solve, we need valid 'url' and 'key'.
# Note, that if 'url' and 'key' parameters are present,
# the solve will be started on DOcplexcloud even if CPLEX is available.
# I've provided more info on this in optimization_model_docplex.py

# For now, a simple way to handle local or docloud solve
if model_params['cplex_cloud']:
    model.solve(url=model_params['url'], key=model_params['api_key'])
else:
    model.solve()

if model.solve_details.status == 'optimal':
    logger.info('The solution is optimal and the objective value '
                'is ${:,.2f}!'.format(model.objective_value))

# ================== Output ==================
dict_of_variables = {'production_variables': production_variables,
                     'inventory_variables': inventory_variables}

output_df = process_data.write_outputs(dict_of_variables, attr='solution_value')
helper.write_to_csv(output_df)
logger.info('Outputs are written to csv!')
Esempio n. 6
0
def fit_model_by_condition(subj_idx=0,
                           n=1,
                           n_training_conditions=9,
                           test_conditions='all'):
    '''
    NB: This script can (and should) be run in parallel in several different python consoles, one subject per console
    subj_idx: 0 to 6 to obtain individual fits, or 'all' to fit to group-averaged data
    n: number of repeated fits per condition (n>1 can be used to quickly check robustness of model fitting)
    n_training_conditions: defines how many conditions will be included in the training set (4, 8, or 9)
                            For `4`, training data for each condition (TTA, d) will be the decisions where both TTA and d
                            are different from those of the current condition. For `8`, all other conditions will be included.
    test_conditions: 'all' to cross-validate model on all nine conditions, or a list of dicts with conditions for which to fit the model 
    '''
    modelTtaBounds = model_definitions.ModelTtaBounds(ndt='gaussian')
    exp_data = pd.read_csv('../data/measures.csv',
                           usecols=[
                               'subj_id', 'RT', 'is_turn_decision',
                               'tta_condition', 'd_condition'
                           ])
    subjects = exp_data.subj_id.unique()

    if test_conditions == 'all':
        conditions = [{
            'tta': tta,
            'd': d
        } for tta in exp_data.tta_condition.unique()
                      for d in exp_data.d_condition.unique()]
    else:
        conditions = test_conditions

    if subj_idx == 'all':
        subj_id = 'all'
        subj_data = exp_data
        loss = model_definitions.LossWLSVincent
    else:
        subj_id = subjects[subj_idx]
        subj_data = exp_data[(exp_data.subj_id == subj_id)]
        loss = model_definitions.LossWLS

    directory = (
        '../model_fit_results/%s/' %
        ('full_data' if n_training_conditions == 9 else 'cross_validation_%i' %
         (n_training_conditions)))

    file_name = 'subj_%s.csv' % (str(subj_id))
    if n > 1:
        helper.write_to_csv(directory, file_name,
                            ['subj_id', 'tta', 'd', 'n', 'loss'] +
                            modelTtaBounds.param_names)
    else:
        helper.write_to_csv(directory, file_name,
                            ['subj_id', 'tta', 'd', 'loss'] +
                            modelTtaBounds.param_names)

    for i in range(n):
        print('Training conditions: %i' % (n_training_conditions))
        print(subj_id)

        if n_training_conditions == 9:
            training_data = subj_data
            print('len(training_data): ' + str(len(training_data)))

            fitted_model = helper.fit_model(modelTtaBounds.model,
                                            training_data, loss)
            if n > 1:
                helper.write_to_csv(directory, file_name, [
                    subj_id, 'NA', 'NA', i,
                    fitted_model.get_fit_result().value()
                ] + fitted_model.get_model_parameters())
            else:
                helper.write_to_csv(directory, file_name, [
                    subj_id, 'NA', 'NA',
                    fitted_model.get_fit_result().value()
                ] + fitted_model.get_model_parameters())
        else:
            for condition in conditions:
                print(condition)
                if n_training_conditions == 8:
                    training_data = subj_data[(
                        ~(subj_data.tta_condition == condition['tta'])
                        | ~(subj_data.d_condition == condition['d']))]
                elif n_training_conditions == 4:
                    training_data = subj_data[(
                        ~(subj_data.tta_condition == condition['tta'])
                        & ~(subj_data.d_condition == condition['d']))]
                else:
                    raise (ValueError(
                        'n_training_conditions should be one of [9, 8, 4]'))
                print('len(training_data): ' + str(len(training_data)))
                fitted_model = helper.fit_model(modelTtaBounds.model,
                                                training_data, loss)
                if n > 1:
                    helper.write_to_csv(directory, file_name, [
                        subj_id, condition['tta'], condition['d'], n,
                        fitted_model.get_fit_result().value()
                    ] + fitted_model.get_model_parameters())
                else:
                    helper.write_to_csv(directory, file_name, [
                        subj_id, condition['tta'], condition['d'],
                        fitted_model.get_fit_result().value()
                    ] + fitted_model.get_model_parameters())
Esempio n. 7
0
    # consecutive_pages = pages
    consecutive_pages = [tuple(li) for li in consecutive_int_list(unique(pages))]
    # consecutive_pages = sorted(flatten([li for li in consecutive_int_list(list(set(pages))) if len(li) > 1]))
    # consecutive_pages = [tuple(li) for li in consecutive_int_list(list(set(pages))) if len(li) > 1]
    return consecutive_pages


if __name__ == "__main__":
    import get_pdf
    from test_cases import test_cases
    from hkex import get_data
    from helper import write_to_csv
    # logging.basicConfig(level=logging.INFO)
    for data in get_data():
        result = {}
        url = data.file_link
        csv = 'indpt_audit_report_2.csv'
        pdf_obj = get_pdf.byte_obj_from_url(url)
        try:
            py_pdf = get_pdf.by_pypdf(pdf_obj)
        except:
            continue
        toc = _get_toc(pdf)
        # print(toc)
        pattern =  r'^(?!.*internal)(?=.*report|responsibilities).*auditor.*$'
        pages = _get_page_by_outline(toc, pattern) or _get_page_by_page_title_search(get_pdf.by_pdfplumber(pdf_obj), pattern)
        result['result'] = pages
        result['toc'] = 'available' if toc else 'unavailable'
        result['url'] = url
        write_to_csv(result, csv)
        pdf_obj.close()