コード例 #1
0
def checkDelimiter(filename, directory):
    """
    Checks the delimiter of a file or takes the input from the user
    """
    with open(os.path.join(directory, filename)) as f:
        for line in f:
            if re.match(r"^\d+.*$", line):
                delim = detect(line)
                break
    if delim is None:
        delim = input("""Type of delimiter\n[1] ('\\t')\n[2] (' ')\n[3] (';')
                      [4] (',')\n [5] Custom delimiter\nChosen option: """)
        delim = int(delim)
        if delim == 1:
            delim = '\\t'
        elif delim == 2:
            delim = ' '
        elif delim == 3:
            delim = ';'
        elif delim == 4:
            delim = ','
        elif delim == 5:
            delim = input("Enter custom delimiter: ")
    cls()
    return delim
コード例 #2
0
def get_file_delimiter(arg_file):
    with open(arg_file, 'rb') as file:
        first_line = str(file.readline().strip())
    delimiter = str(detect_delimiter.detect(first_line))
    log.info('Auto-detection of delimiter for text-type file : [{0}]'.format(
        delimiter))

    return delimiter
コード例 #3
0
 def wrapper(*args, **kwargs):
     try:
         fun_res = func(*args, **kwargs)
     except AssertionError as e:
         fun_res = func(*args, **kwargs, sep=detect(e.args[0]))
     except Exception as e:
         raise e
     return fun_res
コード例 #4
0
def get_delimiter(path):
    with open(path) as csv_file:
        first_line = csv_file.readline()
        if first_line:
            delimiter = detect(first_line)
            if delimiter:
                return delimiter
    raise ValueError('Can\'t detect the delimiter')
コード例 #5
0
ファイル: utils.py プロジェクト: Senejohnny/DESA_App
def detectdelimiter(file):
    """
    this will detect the famous delimiters [',', ';', ':', '|', '\t'] 
    The input is _io.StringIO object. 
    """

    first_line = file.getvalue().split('\n')[
        0]  # get the first line of the StringIO object
    return detect(first_line)  # apply the detect method
コード例 #6
0
def detectdelimiter(file):
    """
    this will detect the famous delimiters [',', ';', ':', '|', '\t'] 
    """
    import csv
    from detect_delimiter import detect
    with open(file, newline='') as f:
        filecsv = csv.reader(f)
        first_line = list(filecsv)[0][0]
    return detect(first_line)
コード例 #7
0
def text_to_dict(text):
    mydict = {}
    delim = detect_delimiter.detect(text[0], whitelist=[':', ' '])
    if delim == None:
        raise ValueError('Cannot detect delimiter.')
    for line in text:
        if line.strip() != '':
            line_split = line.split(sep=delim, maxsplit=1)
            key = line_split[0].strip()
            value = line_split[1].strip()

            field_mode = 'field_mode__size'
            field_test_results = test_field_mode(field_mode, key)
            if field_test_results[0] == field_mode:
                key = field_test_results[1]
                value = value.replace(' ','') \
                             .replace(',','') \
                             .replace('bytes','')

            field_mode = 'field_mode__crc32'
            field_test_results = test_field_mode(field_mode, key)
            if field_test_results[0] == field_mode:
                key = field_test_results[1]
                value = value.upper().replace(' ', '')

            field_mode = 'field_mode__md5'
            field_test_results = test_field_mode(field_mode, key)
            if field_test_results[0] == field_mode:
                key = field_test_results[1]
                value = value.upper().replace(' ', '')

            field_mode = 'field_mode__sha1'
            field_test_results = test_field_mode(field_mode, key)
            if field_test_results[0] == field_mode:
                key = field_test_results[1]
                value = value.upper().replace(' ', '')

            field_mode = 'field_mode__sha256'
            field_test_results = test_field_mode(field_mode, key)
            if field_test_results[0] == field_mode:
                key = field_test_results[1]
                value = value.upper().replace(' ', '')

            mydict[key] = value
    return mydict
コード例 #8
0
    def bulk_host_popup(self):
        file= npyscreen.selectFile()
        try:
            with open(file, 'r') as f:
                content = f.read()
                content = content.replace('\r\n', '\n')
                from detect_delimiter import detect
                delimiter = detect(content, whitelist=['\n', ','])
                ip_addresses = content.split(delimiter)
                current_value = self.add_hosts_ip_addresses.get_value()
                self.add_hosts_ip_addresses.set_value(f"{current_value}{',' if current_value[-1] != ',' else ''}{','.join(filter(None, ip_addresses))}")

        except FileNotFoundError:
            npyscreen.notify_confirm("File was not found!", title="Information Missing/Incorrect", wide=True, editw=1)
        except PermissionError:
            npyscreen.notify_confirm("You do not have permissions to open this file!", title="Information Missing/Incorrect", wide=True, editw=1)
        except UnicodeDecodeError:
            npyscreen.notify_confirm("We are unable to open the file. Make sure the file is human readable", title="Information Missing/Incorrect", wide=True, editw=1)
        except Exception as e:
            npyscreen.notify_confirm(f"Something went wrong, please retry, or select a different file.\nStack Trace:\n:{str(e)}",
                                     title="Equivalent of a 500 error", wide=True, editw=1)
        self.DISPLAY()
コード例 #9
0
def genRawPreview(file_path,file_name,user_id):
    try:
        
        with open(os.path.join(file_path, file_name), 'r', encoding='utf-8') as file:
            filehead = file.readline() # skip the first line
            hdata = []
            data_list = []
            target_directory = UPLOAD_FOLDER + user_id + '\\'
            str_filehead = ''
            for h_content in filehead:
                str_filehead += h_content
                
            sfn = os.path.splitext(file_name)
            for index, value in enumerate(sfn):
                if index == 0:
                    file_name_only = value
                    tgt_filename = file_name_only + '.json'
                    tgt_filePath = target_directory
                else:
                    file_type = value
                
            deli = detect(str_filehead,whitelist = [',', ';', ':', '|', '\t'])
            hdata.append(str_filehead.split(deli))
            for h in hdata:
                list_h = list(h)
#            print(list_h)
            
            allowed_header = ['email','phone','name', 'currency', 'merchant_id','ID','EMAIL']
    
        
            h_flag = common_member(list_h,allowed_header)
        
            rows = [[str(x) for x in line.split(deli)] for line in file]
            cols = [list(col) for col in zip(*rows)]
            
            if not os.path.exists(target_directory):
                    os.makedirs(target_directory)
            
            if h_flag == True:
                tuples = [tuple(x) for x in rows]
#                print(tuples)
                for row in tuples:
                    save_data = OrderedDict()
                    for i, r in enumerate(row):
                        save_data[list_h[i]] = row[i]
                    data_list.append(save_data)
            else:
                with open(os.path.join(file_path, file_name), 'r',encoding='utf-8') as file:
                    rows = [[str(x) for x in line.split(deli)] for line in file]
                    cols = [list(col) for col in zip(*rows)]
                    tuples = [tuple(x) for x in rows]
                    for row in tuples:
                        save_data = OrderedDict()
                        for i, r in enumerate(row):
                            j = 'col'+str(i)
                            save_data[j] = row[i]
                        data_list.append(save_data)

            with open(os.path.join(tgt_filePath,tgt_filename), 'w') as json_file:
                json.dump(data_list, json_file)
                return {'result': 'File successfully uploaded'}
    
    except Exception as e:
        return {'error': str(e)}
コード例 #10
0
def main():
    """
    Main is responsible for the visualisation of everything connected with streamlit.
    It is the web application itself.
    """

    # # Radiobuttons in one row
    # st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)

    # Sets sidebar's header and logo
    sidebar.sidebar_head()

    #
    # # Spectrometer type `- BWTek / Renishaw / Witec / Wasatch / Teledyne
    #

    spectra_types = [
        'EMPTY', 'BWTEK', 'RENI', 'WITEC', 'WASATCH', 'TELEDYNE', 'JOBIN'
    ]
    spectrometer = st.sidebar.selectbox("Choose spectra type",
                                        spectra_types,
                                        format_func=LABELS.get,
                                        index=0)

    # sidebar separating line
    sidebar.print_widgets_separator()

    # User data loader
    # sidebar.print_widget_labels('Upload your data or try with ours', 10, 0)

    files = st.sidebar.file_uploader(label='Upload your data or try with ours',
                                     accept_multiple_files=True,
                                     type=['txt', 'csv'])

    # Allow example data loading when no custom data are loaded
    if not files:
        if st.sidebar.checkbox("Load example data"):
            if spectrometer == "EMPTY":
                st.sidebar.error('First Choose Spectra type')
            else:
                files = utils.load_example_files(spectrometer)

    # Check if data loaded, if yes, perform actions
    delim = None
    if files:
        st.spinner('Uploading data in progress')
        # sidebar separating line
        sidebar.print_widgets_separator()

        from detect_delimiter import detect
        new_files = []
        for file in files:
            file.seek(0)
            lines = file.readlines()

            try:
                lines = [line.decode('utf-8') for line in lines]
            except AttributeError:
                pass

            # lines = str.splitlines(str(text))  # .split('\n')
            first_lines = '\n'.join(lines[:20])

            delim = detect(first_lines)
            colnum = lines[-2].count(delim)

            lines = [i for i in lines if i.count(delim) == colnum]
            text = '\n'.join(lines)
            buffer = io.StringIO(text)
            buffer.name = file.name
            new_files.append(buffer)

        try:
            df = save_read.read_files(spectrometer, new_files, delim)
        except (TypeError, ValueError):
            st.error('Try choosing another type of spectra')
            st.stop()

        main_expander = st.beta_expander("Customize your chart")
        # Choose plot colors and templates
        with main_expander:
            plots_color, template = vis_utils.get_chart_vis_properties()

        # Select chart type
        chart_type = vis_opt.vis_options()

        # sidebar separating line
        sidebar.print_widgets_separator()

        # Select data conversion type
        spectra_conversion_type = vis_opt.convertion_opt()

        # TODO need improvements
        # getting rid of duplicated columns
        df = df.loc[:, ~df.columns.duplicated()]

        #
        # # data manipulation - raw / optimization / normalization
        #

        # TODO delete if not needed
        # Normalization
        # if spectra_conversion_type == LABELS["NORM"]:
        #     df = (df - df.min()) / (df.max() - df.min())

        # Mean Spectra
        if chart_type == 'MS':
            df = df.mean(axis=1).rename('Average').to_frame()

        # columns in main view. Chart, expanders
        # TODO rozwiązać to jakoś sprytniej
        normalized = False
        col_left, col_right = st.beta_columns([5, 2])
        if spectra_conversion_type != "RAW":
            col_right = col_right.beta_expander("Customize spectra",
                                                expanded=False)
            with col_right:
                vals = data_customisation.get_deg_win(chart_type,
                                                      spectra_conversion_type,
                                                      df.columns)
                if st.checkbox("Data Normalization"):
                    normalized = True
                    df = (df - df.min()) / (df.max() - df.min())
                else:
                    normalized = False

        # For grouped spectra sometimes we want to shift the spectra from each other, here it is:
        with main_expander:
            # TODO the code below needed?
            # trick to better fit sliders in expander
            # _, main_expander_column, _ = st.beta_columns([1, 38, 1])
            # with main_expander_column:

            shift_col, _, trim_col = st.beta_columns([5, 1, 5])
            with shift_col:
                if chart_type == 'GS':
                    shift = data_customisation.separate_spectra(normalized)
                elif chart_type == 'SINGLE':
                    col = st.selectbox('spectrum to plot', df.columns)
                    df = df[[col]]
                else:
                    shift = None
            with trim_col:
                df = vis_utils.trim_spectra(df)

        # data conversion end
        if spectra_conversion_type in {'OPT'}:
            baselines = pd.DataFrame(index=df.index)
            baselined = pd.DataFrame(index=df.index)
            flattened = pd.DataFrame(index=df.index)
            for col in df.columns:
                baselines[col] = peakutils.baseline(df[col], vals[col][0])
                baselined[col] = df[col] - baselines[col]
                flattened[col] = baselined[col].rolling(window=vals[col][1],
                                                        min_periods=1,
                                                        center=True).mean()

        #
        # # Plotting
        #

        # Groupped spectra
        if chart_type == 'GS':
            shifters = [(i + 1) * shift for i in range(len(df.columns))]
            plot_df = df if spectra_conversion_type == 'RAW' else flattened
            plot_df = plot_df + shifters

            figs = [
                px.line(plot_df,
                        x=plot_df.index,
                        y=plot_df.columns,
                        color_discrete_sequence=plots_color)
            ]

        # Mean spectra
        elif chart_type == 'MS':
            if spectra_conversion_type == 'RAW':
                plot_df = df
                figs = [
                    px.line(plot_df,
                            x=plot_df.index,
                            y=plot_df.columns,
                            color_discrete_sequence=plots_color)
                ]

            elif spectra_conversion_type in {'OPT'}:
                columns = [
                    'Average', 'Baseline', 'BL-Corrected',
                    'Flattened + BL-Corrected'
                ]
                plot_df = pd.concat([df, baselines, baselined, flattened],
                                    axis=1)
                plot_df.columns = columns

                fig1 = px.line(plot_df,
                               x=plot_df.index,
                               y=columns[-1],
                               color_discrete_sequence=plots_color[3:])
                fig2 = px.line(plot_df,
                               x=plot_df.index,
                               y=plot_df.columns,
                               color_discrete_sequence=plots_color)
                figs = [(fig1, fig2)]
            else:
                raise ValueError(
                    'Unknown conversion type for Mean spectrum chart')
        # 3D spectra
        elif chart_type == 'P3D':
            plot_df = flattened if spectra_conversion_type in {"OPT"} else df

            plot_df = plot_df.reset_index().melt('Raman Shift',
                                                 plot_df.columns)
            fig = px.line_3d(plot_df,
                             x='variable',
                             y='Raman Shift',
                             z='value',
                             color='variable')

            camera = dict(eye=dict(x=1.9, y=0.15, z=0.2))
            fig.update_layout(
                scene_camera=camera,
                width=1200,
                height=1200,
                margin=dict(l=1, r=1, t=30, b=1),
            )
            figs = [fig]

        # Single spectra
        elif chart_type == 'SINGLE':
            if spectra_conversion_type == 'RAW':
                plot_df = df
                figs = [
                    px.line(plot_df[col], color_discrete_sequence=plots_color)
                    for col in plot_df.columns
                ]
            else:
                columns = [
                    'Average', 'Baseline', 'BL-Corrected',
                    'Flattened + BL-Corrected'
                ]
                figs = []

                plot_df = pd.concat([df, baselines, baselined, flattened],
                                    axis=1)
                plot_df.columns = columns

                fig1 = px.line(plot_df,
                               x=plot_df.index,
                               y=columns[-1],
                               color_discrete_sequence=plots_color[3:]
                               )  # trick for color consistency
                fig2 = px.line(plot_df,
                               x=plot_df.index,
                               y=plot_df.columns,
                               color_discrete_sequence=plots_color)
                fig_tup = (fig1, fig2)
                figs.append(fig_tup)
        else:
            raise ValueError("Something unbelievable has been chosen")

        with col_left:
            charts.show_charts(figs, plots_color, template)

        with col_left:
            st.markdown('')
            link = utils.download_button(plot_df.reset_index(),
                                         f'spectrum.csv',
                                         button_text='Download CSV')
            st.markdown(link, unsafe_allow_html=True)

    else:
        manual.show_manual()

    authors.show_developers()
コード例 #11
0
        assert force, "{} does already exists. Use -f option to overwrite it".format(output_folder)
        shutil.rmtree(output_folder)
    os.mkdir(output_folder)

    # Fix delimiter if not provided or file type has been given
    if delimiter == "csv":
        delimiter = ","
    elif delimiter == "tsv":
        delimiter = "\t"
    
    if delimiter is None:
        print("Inferring delimiter from input data")
        with open(input_file) as f:
            data = [f.readline() for i in range(50)]
            data = "\n".join([x for x in data if x])
            delimiter = detect(data)
            print(f"Delimiter is '{delimiter}'")

    # Import data
    print("Loading data from disk")
    input_data = None
    with open(input_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=delimiter)
        next(csv_reader, None)
        input_data = list(csv_reader)

    print("Parsing and checking consistency")
    data = []
    for i, line in enumerate(input_data):
        assert len(line) == 3, f"Line {i} has a problem: {line}"
        assert all([bool(x) for x in line]), f"Line {i} has a problem: {line}"
コード例 #12
0
    def execute(self):
        if self.raw_s3_file is not None:
            self.main_file = self.s3_download()

        new_files = [
            n for n in self.unpack_files(self.main_file, compression="unzip")
            if ("pdf" not in n["name"].lower())
        ]

        # there should be only one voter file
        voter_file = [n for n in new_files if "vrdb" in n["name"].lower()][0]
        hist_files = [n for n in new_files if "history" in n["name"].lower()]

        if not self.ignore_checks:
            # We're already automatically limiting voter_file to one entry
            self.file_check(len([voter_file]), hist_files=len(hist_files))

        # There are two possible separators. Detect it first.
        line = voter_file["obj"].readline().decode()
        delimiter = detect(line)
        # Return to the beginning of the buffer to read the data now that we
        # know what the separator is.
        voter_file["obj"].seek(0)
        df_voter = pd.read_csv(voter_file["obj"],
                               sep=delimiter,
                               encoding="latin-1",
                               dtype=str,
                               error_bad_lines=False)

        df_hist = pd.DataFrame()
        for hist_file in hist_files:
            line = hist_file["obj"].readline().decode()
            delimiter = detect(line)
            hist_file["obj"].seek(0)
            temp = pd.read_csv(hist_file["obj"],
                               sep=delimiter,
                               encoding="latin-1",
                               dtype=str)
            df_hist = df_hist.append(temp, ignore_index=True)

        # --- handling the voter history file --- #

        # Need to fix/combine the differently named VoterHistoryID
        # and VotingHistoryID columns
        if {"VotingHistoryID", "VoterHistoryID"}.issubset(df_hist.columns):
            df_hist["VotingHistoryID"] = (df_hist.pop("VoterHistoryID").fillna(
                df_hist.pop("VotingHistoryID")))

        # can't find voter history documentation in any yaml, hardcoding column name
        election_dates = pd.to_datetime(df_hist.loc[:, "ElectionDate"],
                                        errors="coerce").dt

        elections, counts = np.unique(election_dates.date, return_counts=True)

        def convert_date(k):
            try:
                return k.strftime("%m/%d/%Y")
            except ValueError:
                return "unknown"

        sorted_elections_dict = {
            str(k): {
                "index": i,
                "count": int(counts[i]),
                "date": convert_date(k),
            }
            for i, k in enumerate(elections)
        }
        sorted_elections = list(sorted_elections_dict.keys())

        df_hist.loc[:, "all_history"] = election_dates.date.apply(str)
        df_hist.loc[:, "sparse_history"] = df_hist.loc[:, "all_history"].map(
            lambda x: int(sorted_elections_dict[x]["index"]))
        df_hist.loc[:, "county_history"] = df_hist.loc[:, self.config[
            "primary_locale_identifier"]]

        voter_groups = df_hist.groupby(self.config["voter_id"])
        all_history = voter_groups["all_history"].apply(list)
        sparse_history = voter_groups["sparse_history"].apply(list)
        county_history = voter_groups["county_history"].apply(list)
        df_hist = pd.concat([all_history, sparse_history, county_history],
                            axis=1)

        # --- handling the voter file --- #
        # some columns have become obsolete
        df_voter = df_voter.loc[:,
                                df_voter.columns.isin(self.
                                                      config["column_names"])]
        df_voter = df_voter.set_index(self.config["voter_id"])

        # pandas loads any numeric column with NaN values as floats
        # causing formatting trouble during execute() with a few columns
        # saw this solution in other states (arizona & texas)
        to_numeric = [
            df_voter.loc[:, col].str.isnumeric().all()
            for col in df_voter.columns
        ]
        df_voter.loc[:, to_numeric] = (
            df_voter.loc[:, to_numeric].fillna(-1).astype(int))

        df_voter = self.config.coerce_numeric(df_voter)
        df_voter = self.config.coerce_strings(
            df_voter,
            exclude=[
                self.config["primary_locale_identifier"],
                self.config["voter_id"],
            ],
        )
        df_voter = self.config.coerce_dates(df_voter)

        # add voter history
        df_voter = df_voter.join(df_hist)

        # Add party_idenitfier dummy values,
        # since WA doesn't have party info
        df_voter.loc[:, self.config["party_identifier"]] = NO_PARTY_PLACEHOLDER

        # Need to remap status codes because the original data are messy
        df_voter["StatusCodeOrig"] = df_voter["StatusCode"]
        df_voter["StatusCode"] = df_voter["StatusCodeOrig"].map(
            self.config["status_codes_remap"])
        if df_voter["StatusCode"].isnull().any():
            missing = df_voter[
                df_voter["StatusCode"].isnull()]["StatusCodeOrig"].to_list()
            logging.warning("Status codes missing from status_codes_remap")
            logging.warning(missing)

        # Check for missing columns; catch error because we're fixing them
        # below
        try:
            self.column_check(list(df_voter.columns))
        except MissingColumnsError:
            pass

        # Make sure all columns are present
        expected_cols = (self.config["ordered_columns"] +
                         self.config["ordered_generated_columns"])
        # Remove the index column to avoid duplication
        expected_cols.remove(self.config["voter_id"])

        df_voter = self.reconcile_columns(df_voter, expected_cols)
        df_voter = df_voter[expected_cols]

        self.meta = {
            "message": f"washington_{datetime.now().isoformat()}",
            "array_encoding": json.dumps(sorted_elections_dict),
            "array_decoding": json.dumps(sorted_elections),
        }

        self.processed_file = FileItem(
            name="{}.processed".format(self.config["state"]),
            io_obj=StringIO(df_voter.to_csv(encoding="utf-8")),
            s3_bucket=self.s3_bucket,
        )
コード例 #13
0
def txt2dict(path):
    print('getting info from', path)
    with open(path) as f:
        lines = f.readlines()
    for line in lines:
        if line[0] == '#':
            lines.remove(line)
    for ind, line in enumerate(lines):
        if '#' in line:
            lines[ind] = line[0:line.index('#')]
        elif '\n' in line:
            lines[ind] = line.replace('\n', '')
    for line in lines:
        if line == '':
            lines.remove(line)
    delimiter = detect(lines[0])
    print(len(lines), 'lines found in txt_file with', delimiter,
          'as the delimiter')
    try:
        for i in [0]:
            lines = [item.strip().rsplit(delimiter, 2) for item in lines]
            input_txt = {item[0].strip(): item[1].strip() for item in lines}
    except:
        print('failed to read txt_file')
    for key, val in input_txt.items():
        if ',' in val:
            try:
                input_txt[key] = tuple(map(int, val.split(',')))
            except:
                try:
                    input_txt[key] = [item.strip() for item in val.split(',')]
                except:
                    pass
        else:
            try:
                input_txt[key] = float(val)
            except:
                input_txt[key] = str2bool(val)
    ### adding some default parameters if missing in info_txt
    if 'sigma' not in input_txt.keys():
        input_txt['sigma'] = 0
    if 'steps' not in input_txt.keys():
        input_txt['steps'] = ['all']
    if 'reg_subset' not in input_txt.keys():
        input_txt['reg_subset'] = [0, 0]
    if 'metric' not in input_txt.keys():
        input_txt['metric'] = 'mattes'
    if 'check_ch' not in input_txt.keys():
        input_txt['check_ch'] = input_txt['ch_names'][0]
    if 'double_register' not in input_txt.keys():
        input_txt['double_register'] = False
    #### reasign un-recognized parameters
    if type(input_txt['ch_names']) != list:
        input_txt['ch_names'] = [input_txt['ch_names']]
    if type(input_txt['drift_corr']) != list:
        input_txt['drift_corr'] = [input_txt['drift_corr']]
    if type(input_txt['steps']) == str:
        input_txt['steps'] = [input_txt['steps'].lower()]
    elif type(input_txt['steps']) == tuple:
        input_txt['steps'] = [s.lower() for s in input_txt['steps']]
    if 'all' in input_txt['steps']:
        input_txt['steps'] = [
            'preshift', 'postshift', 'ants', 'mask', 'n2v', 'clahe'
        ]
    if 'check_ch' not in input_txt['ch_names']:
        print(
            'channel defined for similarity_check not recognized, so ch_0 used'
        )
        input_txt['check_ch'] = input_txt['ch_names'][0]
    print(input_txt)
    return input_txt
コード例 #14
0
def get_delimiter(sample_string: str):
    return detect(sample_string).replace("\\", "\\\\")
コード例 #15
0
	def determineDelimiter(self,lineData):
		return detect_delimiter.detect(lineData)