Python get_all_files Examples, core.h_file_handling.get_all_files Python Examples

Example #1

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def create_FINAL_from_CULLED(project_path):
    culled_path = hfh.get_all_files(project_path + '/reports',
                                    target_string='CULLED.csv')
    cal_f_path = hfh.get_all_files(project_path + '/calibration',
                                   target_string='CAL_f.csv')
    cal_c_path = hfh.get_all_files(project_path + '/calibration',
                                   target_string='CAL_c.csv')
    if len(culled_path) * len(cal_f_path) * len(cal_c_path) == 1:
        f_df = hfh.get_df(cal_f_path[0])
        c_df = hfh.get_df(cal_c_path[0])
        #c_df.columns = ['AccNum','A','B','C','D','E']
        culled_df = hfh.get_df(culled_path[0], header=0)
    else:
        print("invalid call CULLED, CAL_f and CAL_c")
        return False
    ids = f_df.iloc[:, 0]
    f_df = f_df.drop(columns=[0])
    f_df = f_df.T
    f_df = f_df.reset_index()
    f_df = f_df.drop(columns='index')
    c_df.columns = ['AccNum', 'B', 'C', 'D', 'E', 'F']
    f_df.insert(0, 'AccNum', c_df['AccNum'])

    c_df = c_df[c_df['AccNum'].isin(culled_df['AccNum'])]
    f_df = f_df[f_df['AccNum'].isin(culled_df['AccNum'])]
    f_df = f_df.drop(columns=['AccNum'])
    f_df = f_df.T
    f_df.insert(0, 'ID', ids)
    name = hfh.get_stem(cal_f_path[0])[:-6]
    path = project_path + '/calibration/' + name + '_FINAL_'
    f_df.to_csv(path + 'f.csv', header=False, index=False)
    c_df.to_csv(path + 'c.csv', header=False, index=False)

    print('hello')

Example #2

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/Data_Manipulation

def get_confirm_on_pairing(raw_data, ui = True):
    pe_files = hfh.get_all_files(raw_data, extension='csv')
    data_files = hfh.get_all_files(raw_data, extension='txt')
    xlsx_files = hfh.get_all_files(raw_data, extension='xlsx')
    confirm = True
    if ui:
        confirm_message = "We found " + str(len(pe_files)) + " csv pro exam files, " + str(len(xlsx_files)) + \
                          " +  excel pro exam files, and " + str(len(data_files)) + " data files.\n"
        confirm_message += "Is this the correct number for each?"
        confirm = u.get_yes_no_response("Matching", confirm_message)
    if confirm:
        return 1
    return 0

Example #3

0

Show file

File: C_form.py Project: robertcalvertphd/Data_Manipulation

    def create_reports(self, remove_rtf = True):
        print("report call")
        self.log.clear()
        self.b_report.config(state=tk.DISABLED)
        self.gui.after(200, lambda: self.b_report.config(state=tk.NORMAL))

        valid = self.pd_validate_inputs(xCalibre_required=True)
        parent_path = self.report_path.get()
        master_name = self.report_name.get()
        master_path = parent_path + "/" + master_name
        xCalibre_path = master_path + "/xCalibreOutput"
        if valid:
            report_log = str("report creation successful \nreports:" + master_path+"/reports" )
            self.log.append(report_log)
            results = h2p.create_master_file(master_folder= master_path)
            files = hfh.get_all_files(xCalibre_path, extension='rtf')
            for file in files:
                os.remove(file)
            self.d_log()
            self.log.clear()
            return results
        else:
            self.log.append("Invalid function call.")
            self.d_log()
            self.log.clear()
            return 0

Example #4

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def update_c_from_bank(project_path):
    # assumes that updated _c files have position instead of accNum
    bank_directory = project_path + '/bank_files'
    processed_directory = project_path + '/processed_data'
    c_files = hfh.get_all_files(processed_directory, target_string='_c.csv')
    b_files = hfh.get_all_files(bank_directory, target_string='.xlsx')
    pairs = hfh.pair_files(c_files, b_files)
    for pair in pairs:
        c_file = pair[0]
        b_file = pair[1]
        c_df = hfh.get_df(c_file)
        b_df = pd.read_excel(b_file)
        c_df.columns = [
            'AccNum', 'Key', 'Options', 'Domain', 'Include', 'Type'
        ]
        c_df['AccNum'] = b_df['AccNum']
        c_df = c_df[['AccNum', 'Key', 'Options', 'Domain', 'Include', 'Type']]

        c_df.to_csv(c_file, index=None, header=None)

Example #5

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/Data_Manipulation

def process_raw_data(master_folder, raw_data):
    form_files = hfh.get_all_files(raw_data, extension="csv")
    data_files = hfh.get_all_files(raw_data, extension='txt')
    if not len(form_files) == len(data_files):
        #   assume that we are using xlsx files
        form_files = hfh.get_all_file_names_in_folder(raw_data, extension="xlsx")

    for file in form_files:
        name = hfh.get_stem(file) + '_raw_backup_form.' + hfh.get_extension(file)
        hfh.copy_file_and_write_to_destination(file, master_folder + "/data", modified_name=name)

    for file in data_files:
        name = hfh.get_stem(file) + '_raw_backup_data.' + hfh.get_extension(file)
        hfh.copy_file_and_write_to_destination(file, master_folder + "/data", modified_name=name)

    paired_files = pair_files(form_files, data_files)
    if paired_files:
        for pair in paired_files:
            pe_file = pair[0]
            data_file = pair[1]
            process_paired_files(pe_file, data_file, master_folder)

Example #6

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def create_forms_from_bank(project_path,
                           operational=True,
                           create_bank_L=False):
    bank_files = hfh.get_all_files(project_path + '/bank_files')
    print("creating forms")
    for file in bank_files:
        if not create_bank_L and file.find("BANK") > 0:
            pass
        else:
            b_df = pd.read_excel(file, header=0)
            cut = ""
            if create_bank_L and file.find('BANK') > -1:
                # check if passing is present
                i = file.find('_')
                if i > -1:
                    name = hfh.get_stem(file)
                    cut = name[i:]
                    try:
                        cut = int(cut)
                    except ValueError:
                        print(
                            file,
                            "contains and underscore but does not provide a cut"
                        )
                form = pd.DataFrame([])
                form['AccNum'] = b_df['AccNum']
                form_length = len(form) + 1
                form.insert(0, 'Position', range(1, form_length))

            elif operational:
                if 'UseCode' in b_df.columns:
                    form = b_df[b_df['UseCode'] == 'Operational']
                    form = form[['Position', 'AccNum']]
            elif 'Position' in b_df.columns:
                form = b_df[['Position', 'AccNum']]

            name = hfh.get_stem(file)
            suffix = '_LF'
            prefix = 'full'
            if operational:
                suffix = '_LO' + str(cut)
                prefix = 'operational'
            form.to_csv(project_path + '/forms/' + prefix + '/' + name +
                        suffix + '.csv',
                        index=False)

Example #7

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def remove_accNum_from_f_and_c(accNum, name, program_path, reason=None):
    # create backup_processed_data folder
    report_folder = program_path + '/reports'
    backup_processed_folder = program_path + '/' + E.BACKUP_PROCESSED_DATA_P
    processed_folder = program_path + '/processed_data'
    hfh.create_dir(backup_processed_folder)
    # create notation of removal with reason
    f_df = hfh.get_single_file(processed_folder,
                               target_string=name + '_f.csv',
                               as_df=True,
                               strict=True)
    c_file = hfh.get_single_file(processed_folder,
                                 target_string=name + '_c.csv',
                                 strict=True)
    c_df = hfh.get_df(c_file, header=get_header_argument(c_file))

    s_ret = get_strict_format_f_df(c_df, f_df, get_c_df=True)
    c_df = s_ret[0]
    f_df = s_ret[1].T
    f_df = f_df.drop(accNum)
    c_df = c_df.set_index(['AccNum'])
    c_df = c_df.drop(accNum)
    c_df = c_df.reset_index(drop=False)
    f_df = f_df.T
    strict_grade(c_df, f_df, operational=False)  # solely for validation
    f_df.to_csv(program_path + '/processed_data/' + name + '_f.csv',
                header=None,
                index=True)
    c_df.to_csv(program_path + '/processed_data/' + name + '_c.csv',
                index=False,
                header=None)

    removed_report_path = hfh.get_all_files(program_path + "/" + E.REPORTS_P +
                                            '/',
                                            target_string=E.REMOVED_ITEMS_R)

    entry = accNum + " was removed from " + name
    if reason is not None:
        entry += " because of a " + reason
    if len(removed_report_path) == 0:
        removed_report_path = program_path + "/" + E.REPORTS_P + '/' + E.REMOVED_ITEMS_R
        hfh.write_lines_to_text([entry + '\n'], removed_report_path)
    else:
        hfh.add_lines_to_csv(removed_report_path[0], [entry])

Example #8

0

Show file

def create_master_file(report_name = False, xCalibre_output_path = False, reports_path = False, master_folder = False):
    function_id = "h_2p_report_analysis|create_master_file"
    r_entries = [] # list of tuples constant, message
    if master_folder:
        #confirm master folder does not end in /
        if master_folder[-1] == '/':
            master_folder = master_folder[:-1]
        reports_path = master_folder + "/reports"
        xCalibre_output_path = master_folder + "/xCalibreOutput"
        search_name = master_folder[:-1]
        report_name = master_folder[search_name.rfind('/')+1:]
    if not report_name:
        u.get_string("What would you like to title the report?")

    path = xCalibre_output_path
    if not xCalibre_output_path:
        path = u.get_folder("Locate the xCalibreOutput reports folder", required=False)
        if not path:
            r_entries.append([R.PATH_INVALID,"Invalid xCalibrePath:" + path])
            f = hfh.get_parent_folder(path)
            if not f == 'xCalibreOutput':
                r_entries.append([R.WRONG_FOLDER," must select xCalibreOutput folder"])

    if not reports_path:
        reports_path  = u.get_folder("Choose a folder for generated reports.", required=False)
        if not reports_path:
            r_entries.append([R.PATH_INVALID, "Invalid report path:" + path])
        if hfh.get_parent_folder(reports_path) != "xCalibreOutput":
            r_entries.append([R.WRONG_FOLDER,"Not xCalibreOutput"])

    if os.path.isdir(reports_path):
        stats_files = hfh.get_all_files(xCalibre_output_path, "Stats")
        if len(stats_files) == 0:
            r_entries.append([R.NO_STATS_FILES, "xCalibreOutput reports:" + path])
        else:
            process_stats_files(stats_files, reports_path, report_name)
        if len(r_entries) == 0:
            r_entries.append([R.VALID,"create_master_file executed"])
    return R(function_id,r_entries)

Example #9

0

Show file

def has_acceptable_correct_percentage(xCalibre_report_path,
                                      id_length=8,
                                      debug=True):
    files = hfh.get_all_files(xCalibre_report_path, "Matrix")
    for file in files:
        total = 0
        correct = 0
        lines = hfh.get_lines(file)
        for line in lines:
            scores = line[id_length:-1]
            for x in scores:
                total += 1
                try:
                    correct += int(x)
                except:
                    pass
        percent_correct = round(correct / total * 100, 4)
        if debug: print(file, percent_correct)
        if percent_correct < 50:
            print(file, "has low correct rate.")
            return False
    return True

Example #10

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def process_response_string_file(f_path,
                                 bank_path=None,
                                 destination_path=None,
                                 write_csv=False,
                                 get_df=True,
                                 create_c=True,
                                 paired_bank_xlsx=None):
    if create_c:
        assert destination_path is not None, "process response string needs to know where to put the processed data"
    name = hfh.get_stem(f_path)
    lines = hfh.get_lines(f_path)
    assert len(lines) > 0, "asked to process empty file:" + f_path

    c_df = None
    f_df = None

    if is_type_K(lines):
        processed_lines = processK(lines)
        f_df = processed_lines
    elif is_type_A(lines):
        processed_lines = processA(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_B(lines):
        processed_lines = processB(lines)
        f_df = processed_lines
    elif is_type_C(lines):
        processed_lines = processC(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_D(lines):
        processed_lines = processD(lines)
        f_df = processed_lines
    elif is_type_E(lines):
        processed_lines = processE(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_F(lines):
        processed_lines = processF(lines)
        f_df = processed_lines
    elif is_type_G(lines):
        processed_lines = processG(lines)
        c_df = processed_lines[0]
        f_df = processed_lines[1]
    elif is_type_H(lines):
        processed_lines = processH(lines)
        f_df = processed_lines
    elif is_type_I(lines):
        processed_lines = processI(lines)
        f_df = processed_lines
    elif is_type_J(lines):
        processed_lines = processJ(lines)
        f_df = processed_lines

    else:
        print(f_path + " is already formatted")
        is_formatteed(lines)
        f_df = hfh.get_df(f_path)

    if c_df is not None and bank_path:
        # add AccNum instead of sequence
        b_df = create_c_df_from_bank(bank_path)
        b_df['Key'] = c_df['Key']
        c_df = b_df
    if c_df is None and bank_path is not None and create_c:
        #todo: consider respecting the correct answer at the time vs the bank or just destroy it
        bank_files = hfh.get_all_files(bank_path, extension='xlsx')
        pair = hfh.pair_files([f_path], bank_files)
        if len(pair) == 0:
            print(
                "could not find matching bank file and no default control information present."
            )
        if len(pair) == 1:
            # todo: may evaluate differences between bank and response string if desired
            c_df = create_c_df_from_bank(pair[0][1])
        if len(pair) > 1:
            print("more than one file matched for bank", f_path)

    #confirm_id_as_index
    if 0 in f_df.columns or '0' in f_df.columns:
        f_df = f_df.set_index(f_df[0], drop=True)
        f_df = f_df.drop(columns=0)
    if write_csv:
        #todo changed index... need to make sure all processed items spit out the same... in this case they are pre-strict.

        f_df.to_csv(destination_path + '/' + name + '_f.csv',
                    index=True,
                    header=False)
        if c_df is not None:
            c_df.to_csv(destination_path + '/' + name + '_c.csv',
                        index=None,
                        header=False)
    if get_df:
        return f_df

Example #11

0

Show file

File: h_raw_processor.py Project: robertcalvertphd/core_irt

def validate_raw_files(project_path):
    bank_files = hfh.get_all_files(project_path + '/bank_files')
    response_strings = hfh.get_all_files(project_path + 'raw_data')
    pairs = hfh.pair_files(bank_files, response_strings)
    print("valid pairs of raw data = " + str(len(pairs)))

Example #12

0

Show file

File: C_form.py Project: robertcalvertphd/Data_Manipulation

    def pd_validate_inputs(self, raw_required = False, xCalibre_required = False):
        parent_path = self.report_path.get()
        raw_path = self.raw_data_path.get()
        valid = 1
        master_folder = parent_path
        if not os.path.isdir(parent_path):
            self.log.append("Path invalid:" + master_folder)
            valid = 0

        #   assume that parent path is a project folder e.g. ...Desktop/LEX
        if self.report_name.get == "":
            report_path = parent_path + "/reports"
            xCalibre_path = parent_path + "/xCalibreOutput"
            master_name = master_folder[master_folder.rfind('/')+1:]

        else:
            master_name = self.report_name.get()
            master_folder = parent_path + "/" + master_name
            report_path = master_folder + "/reports"
            xCalibre_path = master_folder + "/xCalibreOutput"
            #self.report_name.set(hfh.get_parent_folder(master_folder))
        if raw_required and raw_path == "":
            self.log.append("raw path required")
            valid = 0

        if raw_required and valid:
            data_files = hfh.get_all_files(raw_path, extension='txt')
            form_files = hfh.get_all_files(raw_path, extension='csv')
            if len(form_files) == 0:
                #   assume just xlsx files
                form_files = hfh.get_all_file_names_in_folder(raw_path, extension='xlsx')

            if data_files is None or form_files is None:
                valid = 0
                if data_files is None:
                    self.log.append("data files are missing")
                if form_files is None:
                    self.log.append("form files are missing")
            if valid:
                if len(data_files) == 0 or len(form_files) == 0:
                    valid = 0
                    self.log.append("Raw data does not contain both txt and csv files.")
                    self.log.append("Raw Path:" + raw_path)
                if not len(data_files) == len(form_files):
                    valid = 0
                    self.log.append("There are unequal data and form files in raw data.")
                    self.log.append("Found " + str(len(data_files)) + " data files and " + str(len(form_files)) + " form files.")
                    d = "data:\n"
                    fm = "form:\n"
                    for f in data_files:
                        d+= f+'\n'
                    for f in form_files:
                        fm+=f+'\n'
                    self.log.append(d)
                    self.log.append(fm)
                    self.log.append("Raw Path:" + raw_path)

                for file in data_files:
                    can_read = hfh.file_is_readable(file)
                    if not can_read:
                        valid = 0
                        self.log.append("read access denied for data file:" + file)

                for file in form_files:
                    can_read = hfh.file_is_readable(file)
                    if not can_read:
                        valid = 0
                        self.log.append("read access denied for form file:" + file)
                    if valid:
                        if hfh.get_extension(file) == 'csv':
                            test_df = hfh.get_df(file,header=0)
                        else:
                            test_df = hfh.get_df_from_xlsx(file)
                        required_columns = ["Domain", "AccNum", "CorrectAnswer"]
                        for column in required_columns:
                            if column not in test_df.columns:
                                self.log.append("______")
                                self.log.append(file)
                                self.log.append("pro exam file does not contain " + column + ".\nReset form and then download from Proexam.")
                                self.log.append("______")
                                valid = 0

        if valid:
            master_report_path = master_folder + "/" + 'reports'
            if not os.path.isdir(master_report_path):
                self.log.append("Path does not contain reports folder. \nPath:" + master_folder)
                valid = 0

        if valid:
            if not os.path.isdir(xCalibre_path) and xCalibre_required:
                self.log.append("Path does not contain xCalibreOutput folder. \nPath:" + master_folder)
                valid = 0

        if valid:
            if os.path.isfile(master_name):
                self.log.append("Folder name is a file. It should be a directory, i.e. no extension.")
                valid = 0

        if valid and raw_required:
            if not hr.get_confirm_on_pairing(raw_path):
                valid = 0
                self.log.append("User said pairing was wrong.")

        if valid and xCalibre_required:
            stats_files = hfh.get_all_file_names_in_folder(xCalibre_path, target_string="Stats")
            if len(stats_files) == 0:
                self.log.append("Report path does not contain xCalibreOutput reports")
                valid = 0

            #   check that can write reports
            aggregate_name = report_path + "/" + master_name + "_aggregate_.csv"
            complete_name = report_path + "/" + master_name + "_complete_.csv"

            if os.path.isfile(aggregate_name):
                if not hfh.file_is_writable(aggregate_name):
                    valid = 0
                    self.log.append("No access to " + aggregate_name)
            if os.path.isfile(complete_name):
                if not hfh.file_is_writable(complete_name):
                    valid = 0
                    self.log.append("No access to " + complete_name)

        if valid:
            self.log.append("validated call")
            self.d_log()
        return valid