def process_paired_files(pe_file, data_file, project_folder, select_Domain=False):
    #   todo: add in domain processing here so that sets without domain names are still separated
    if hfh.get_extension(pe_file) == 'xlsx':
        pe_df = hfh.get_df_from_xlsx(pe_file)
    else:
        pe_df = hfh.get_df(pe_file, header=0)

    pe_df.Position = pe_df.Position.astype(float)
    pe_df = pe_df.sort_values("Position", ascending=True, )

    pe_df = pe_df[["AccNum", 'CorrectAnswer', 'Domain']]
    #pe_df.Domain = pe_df.Domain.apply(str)
    #pe_df["AccNum"] = str(pe_df["AccNum"]) + "_" + pe_df["Domain"]

    if select_Domain:
        pe_df.loc[pe_df.Domain != select_Domain, 'include'] = 'N'

    pe_df = pe_df.drop(['Domain'], axis=1)

    pe_df['number_of_options'] = 4
    pe_df['group'] = 1
    pe_df["include"] = 'Y'
    pe_df['type'] = 'M'
    pe_df = pe_df[['AccNum', 'CorrectAnswer', 'number_of_options', 'group', 'include', 'type']]
    processed_path = project_folder + "/processed_data/"
    c_path = processed_path + hfh.get_stem(pe_file) + "_c.csv"
    pe_df.to_csv(c_path, header=False, index=False)
    h.convert_default_data_to_iteman(data_file, processed_path, new_name=hfh.get_stem(pe_file))
    return 1
def process_raw_data(master_folder, raw_data):
    form_files = hfh.get_all_files(raw_data, extension="csv")
    data_files = hfh.get_all_files(raw_data, extension='txt')
    if not len(form_files) == len(data_files):
        #   assume that we are using xlsx files
        form_files = hfh.get_all_file_names_in_folder(raw_data, extension="xlsx")

    for file in form_files:
        name = hfh.get_stem(file) + '_raw_backup_form.' + hfh.get_extension(file)
        hfh.copy_file_and_write_to_destination(file, master_folder + "/data", modified_name=name)

    for file in data_files:
        name = hfh.get_stem(file) + '_raw_backup_data.' + hfh.get_extension(file)
        hfh.copy_file_and_write_to_destination(file, master_folder + "/data", modified_name=name)

    paired_files = pair_files(form_files, data_files)
    if paired_files:
        for pair in paired_files:
            pe_file = pair[0]
            data_file = pair[1]
            process_paired_files(pe_file, data_file, master_folder)
Ejemplo n.º 3
0
def CAB_processor(project_files, tuples_start_operational_and_start_pretest,
                  bank_path):
    print("CAB_processor is not implemented yet")
    assert type(project_files) == list, "project files must be a list"
    assert type(tuples_start_operational_and_start_pretest
                ) == list, 'tuples_start... is a list of tuples'
    assert len(project_files) == len(
        tuples_start_operational_and_start_pretest), "list lengths must match"
    extension = hfh.get_extension(bank_path)
    assert extension == 'xlsx', "bank path must lead to a a.xlsx file"
    dfs = []
    for file in project_files:
        df = process_response_string_file(file)
        dfs.append(df)
    print("hello")
Ejemplo n.º 4
0
def is_valid_name(path, harsh=False):
    acceptable_extensions = ['txt', 'csv']
    ext = hfh.get_extension(path)
    if ext not in acceptable_extensions:
        return False
    name = hfh.get_stem(path)
    month = hfh.find_month(name)
    year = hfh.find_year(name)
    if month and year:
        i_year = name.find(year)
        tag = name[:i_year]
        full_name = tag + year + month
        if harsh:
            if full_name == name:
                return name
        else:
            return name
        # currently unreachable consider removing or incorporating
        if full_name == name[:-2]:
            print("AB form detected")
            return name
    return False
Ejemplo n.º 5
0
    def pd_validate_inputs(self, raw_required = False, xCalibre_required = False):
        parent_path = self.report_path.get()
        raw_path = self.raw_data_path.get()
        valid = 1
        master_folder = parent_path
        if not os.path.isdir(parent_path):
            self.log.append("Path invalid:" + master_folder)
            valid = 0

        #   assume that parent path is a project folder e.g. ...Desktop/LEX
        if self.report_name.get == "":
            report_path = parent_path + "/reports"
            xCalibre_path = parent_path + "/xCalibreOutput"
            master_name = master_folder[master_folder.rfind('/')+1:]

        else:
            master_name = self.report_name.get()
            master_folder = parent_path + "/" + master_name
            report_path = master_folder + "/reports"
            xCalibre_path = master_folder + "/xCalibreOutput"
            #self.report_name.set(hfh.get_parent_folder(master_folder))
        if raw_required and raw_path == "":
            self.log.append("raw path required")
            valid = 0

        if raw_required and valid:
            data_files = hfh.get_all_files(raw_path, extension='txt')
            form_files = hfh.get_all_files(raw_path, extension='csv')
            if len(form_files) == 0:
                #   assume just xlsx files
                form_files = hfh.get_all_file_names_in_folder(raw_path, extension='xlsx')

            if data_files is None or form_files is None:
                valid = 0
                if data_files is None:
                    self.log.append("data files are missing")
                if form_files is None:
                    self.log.append("form files are missing")
            if valid:
                if len(data_files) == 0 or len(form_files) == 0:
                    valid = 0
                    self.log.append("Raw data does not contain both txt and csv files.")
                    self.log.append("Raw Path:" + raw_path)
                if not len(data_files) == len(form_files):
                    valid = 0
                    self.log.append("There are unequal data and form files in raw data.")
                    self.log.append("Found " + str(len(data_files)) + " data files and " + str(len(form_files)) + " form files.")
                    d = "data:\n"
                    fm = "form:\n"
                    for f in data_files:
                        d+= f+'\n'
                    for f in form_files:
                        fm+=f+'\n'
                    self.log.append(d)
                    self.log.append(fm)
                    self.log.append("Raw Path:" + raw_path)

                for file in data_files:
                    can_read = hfh.file_is_readable(file)
                    if not can_read:
                        valid = 0
                        self.log.append("read access denied for data file:" + file)

                for file in form_files:
                    can_read = hfh.file_is_readable(file)
                    if not can_read:
                        valid = 0
                        self.log.append("read access denied for form file:" + file)
                    if valid:
                        if hfh.get_extension(file) == 'csv':
                            test_df = hfh.get_df(file,header=0)
                        else:
                            test_df = hfh.get_df_from_xlsx(file)
                        required_columns = ["Domain", "AccNum", "CorrectAnswer"]
                        for column in required_columns:
                            if column not in test_df.columns:
                                self.log.append("______")
                                self.log.append(file)
                                self.log.append("pro exam file does not contain " + column + ".\nReset form and then download from Proexam.")
                                self.log.append("______")
                                valid = 0

        if valid:
            master_report_path = master_folder + "/" + 'reports'
            if not os.path.isdir(master_report_path):
                self.log.append("Path does not contain reports folder. \nPath:" + master_folder)
                valid = 0

        if valid:
            if not os.path.isdir(xCalibre_path) and xCalibre_required:
                self.log.append("Path does not contain xCalibreOutput folder. \nPath:" + master_folder)
                valid = 0

        if valid:
            if os.path.isfile(master_name):
                self.log.append("Folder name is a file. It should be a directory, i.e. no extension.")
                valid = 0

        if valid and raw_required:
            if not hr.get_confirm_on_pairing(raw_path):
                valid = 0
                self.log.append("User said pairing was wrong.")

        if valid and xCalibre_required:
            stats_files = hfh.get_all_file_names_in_folder(xCalibre_path, target_string="Stats")
            if len(stats_files) == 0:
                self.log.append("Report path does not contain xCalibreOutput reports")
                valid = 0

            #   check that can write reports
            aggregate_name = report_path + "/" + master_name + "_aggregate_.csv"
            complete_name = report_path + "/" + master_name + "_complete_.csv"

            if os.path.isfile(aggregate_name):
                if not hfh.file_is_writable(aggregate_name):
                    valid = 0
                    self.log.append("No access to " + aggregate_name)
            if os.path.isfile(complete_name):
                if not hfh.file_is_writable(complete_name):
                    valid = 0
                    self.log.append("No access to " + complete_name)

        if valid:
            self.log.append("validated call")
            self.d_log()
        return valid