コード例 #1
0
def main():
    train_df, test_df, val_df = get_data('data/negative.csv',
                                         'data/positive.csv')
    write_lines_to_file(train_df['text'], 'train.txt')
    train_df.to_csv('data/train_df.csv', header=True, sep=';', index=False)
    test_df.to_csv('data/test_df.csv', header=True, sep=';', index=False)
    val_df.to_csv('data/val_df.csv', header=True, sep=';', index=False)
コード例 #2
0
    def __change_config(self, identifier, old_value, new_value):
        """Change the configuration file based on the passed values."""
        line_number = self.__get_line_number(identifier)
        lines = utils.read_lines_from_file(self.menu)
        if line_number == -1:
            line_number = 0
            lines.insert(0,"\n")
        line = lines[line_number]
        if old_value == None:
            lines[line_number] = new_value
        elif old_value == "vga=":
            place = line.find(old_value)
            end = line.find(" ", place)
            if place != -1:
                if end != -1:
                    line = line[:place] + new_value + line[end:]
                else:
                    line = line[:place] + new_value + "\n"
            else:
                line = line[:-1] + " " + new_value + "\n"
            lines[line_number] = line
        else:
            if old_value[0] != "#" and line.find(" " + old_value) != -1:
                old_value = " " + old_value

            if old_value[0] == "#" and line.find("# " + old_value[1:]) != -1:
                old_value = "# " + old_value[1:]

            line = line.replace(old_value, new_value)
            lines[line_number] = line

        utils.write_lines_to_file(self.menu, lines)
コード例 #3
0
    def __change_config(self, identifier, old_value, new_value):
        """Change the configuration file based on the passed values."""
        line_number = self.__get_line_number(identifier)
        lines = utils.read_lines_from_file(self.menu)
        if line_number == -1:
            line_number = 0
            lines.insert(0, "\n")
        line = lines[line_number]
        if old_value == None:
            lines[line_number] = new_value
        elif old_value == "vga=":
            place = line.find(old_value)
            end = line.find(" ", place)
            if place != -1:
                if end != -1:
                    line = line[:place] + new_value + line[end:]
                else:
                    line = line[:place] + new_value + "\n"
            else:
                line = line[:-1] + " " + new_value + "\n"
            lines[line_number] = line
        else:
            if old_value[0] != "#" and line.find(" " + old_value) != -1:
                old_value = " " + old_value

            if old_value[0] == "#" and line.find("# " + old_value[1:]) != -1:
                old_value = "# " + old_value[1:]

            line = line.replace(old_value, new_value)
            lines[line_number] = line

        utils.write_lines_to_file(self.menu, lines)
コード例 #4
0
def generate_feature_vectors(all_figures_dir, output_data_folder):
    """Generating feature vectors for figures from text data to be used in a neural model.

    Args:
      all_figures_dir: (string) the folder with the textual fields for the figures (the files used to build an index).
      output_data_folder: (string) a folder to write the different output files.

    Returns:
      (list). Writing the outputs to the output folder, and returning the list of figure identifiers.
    """
    if not os.path.exists(output_data_folder):
        os.mkdir(output_data_folder)

    text_data, figure_identifiers, image_file_names = extract_all_figures(
        all_figures_dir)

    with open(output_data_folder + '/raw_data.txt', 'w+') as raw_data_file:
        for words in text_data:
            words_encoded = [w for w in words]
            raw_data_file.write(' '.join(words_encoded) + '\n')

    utils.write_lines_to_file(output_data_folder + '/image_files.txt',
                              image_file_names)
    all_tokens = itertools.chain.from_iterable(text_data)
    word_to_id = {token: idx for idx, token in enumerate(set(all_tokens))}
    all_tokens = itertools.chain.from_iterable(text_data)
    id_to_word = [token for idx, token in enumerate(set(all_tokens))]
    id_to_word = np.asarray(id_to_word)

    x_token_ids = [[word_to_id[token] for token in x] for x in text_data]
    count = np.zeros(id_to_word.shape)
    for x in x_token_ids:
        for token in x:
            count[token] += 1
    indices = np.argsort(-count)
    id_to_word = id_to_word[indices]
    word_to_id = {token: idx for idx, token in enumerate(id_to_word)}
    x_token_ids = [[word_to_id.get(token, -1) + 1 for token in x]
                   for x in text_data]

    np.save(output_data_folder + '/words_map.npy', np.asarray(id_to_word))

    with open(output_data_folder + '/vectors.txt', 'w+') as f:
        for tokens in x_token_ids:
            for token in tokens:
                f.write(str(token) + ' ')
            f.write("\n")

    with open(output_data_folder + '/identifiers.txt',
              'w+') as identifier_file:
        for identifier in figure_identifiers:
            identifier_file.write(identifier + '\n')

    return figure_identifiers
コード例 #5
0
ファイル: replace_in_file.py プロジェクト: jpcw/sandbox
def main(settings):
    """."""
    files = recursive_glob(settings['source_dir'], settings['filter'])
    patterns = extract_patterns(get_lines_from_file(settings['patterns']),
                                settings['sep'])

    for filename in files:
        lines = get_lines_from_file(filename)
        for search, replace in patterns:
            lines = search_and_replace_lines(search, replace, lines)

        write_lines_to_file(filename, lines)
コード例 #6
0
def write_index_md(path, project) -> bool:
    rs = [
        '# Usage\n',
        f'Reporting for {project}\n',
        'Static site generated by Mkdocs from custom Python scripts and a Scitools Understand database\n'
        '### Reports\n',
        '* Direct Circular File References\n',
        '    1. File 1 depends on File 2 Count times\n',
        '    2. File 2 depends on File 1 Count times\n',
        '* File List Indented\n',
        '    1. Indented source file listing\n',
        '    2. Includes cumulative SLOC count\n',
        '* Red Metrics\n',
        '    1. Listing by metric type for files that contain at least one red metric\n',
        '    2. for item(s) within the file\n',
        '### Searches\n',
        '* Searches do not work when opening site/index.html file directly from web browser\n',
        '* Searches worked when opening site/index.html file from web server\n',
        '    1. In terminal window move to ```site``` directory\n',
        '    2. Start web server at command prompt: ```python3 -m http.server```\n',
        '    3. Open ```http://127.0.0.1:8000/index.html``` in web browser\n',
        '### Metrics\n',
        '* Summarized and scored metrics\n',
        '* Rolled up for all directories below current\n',
    ]
    rs.extend(converters.metric_key_range_table())
    f_name = f'{path}/index.md'
    return utils.write_lines_to_file(f_name, rs)
コード例 #7
0
def circular_file_refs(config, t: tree.UdbTree) -> bool:
    deps = get_dep_on_dict(t)
    bys = get_dep_by_dict(deps)
    circular_depends = calc_circular_dependencies(deps, bys)
    ribbons = get_ribbons_dict(t)

    ribbon_h1_left = "".join(f'|{x}' for x in sorted(t.metric_keys.values()))
    ribbon_h1_right = ribbon_h1_left[1:] + '|'
    ribbon_h2_left = '| ---:' * len(t.metric_keys) + ' '
    ribbon_h2_right = ribbon_h2_left[1:] + '|'

    header1 = f'{ribbon_h1_left}|File 1  |  Dep Cnt |     | Dep Cnt |File 2 |{ribbon_h1_right}\n'
    header2 = f'{ribbon_h2_left}|:---    |   :---:  |:---:|  :---:  |:---   |{ribbon_h2_right}\n'

    rs = ['# Direct Circular File References\n\n']
    rs.extend(t.show_metric_keys_table())
    rs.extend(['\n\n### Circular References\n\n', header1, header2])
    uniques = set()
    for k1, vs in circular_depends.items():
        for k2 in vs.keys():
            # ensure no duplicates
            ls = [k1, k2]
            ls.sort()
            composite_key = f'{ls[0]}-{ls[1]}'
            if composite_key in uniques:
                continue
            uniques.add(composite_key)

            k1_s = t.uids[k1].path.replace('Directory Structure/', '')
            name1 = k1_s.split('/')[-1]
            rel = k1_s.replace(f'/{name1}', '')
            link1 = get_link_to_file_metrics(rel, name1)
            v1 = vs[k2]

            k2_s = t.uids[k2].path.replace('Directory Structure/', '')
            name2 = k2_s.split('/')[-1]
            rel = k2_s.replace(f'/{name2}', '')
            link2 = get_link_to_file_metrics(rel, name2)
            v2 = circular_depends[k2][k1]

            ribbon_l = ribbons[k1]
            ribbon_r = ribbons[k2]
            rs.append(f'{ribbon_l}[{k1_s}]({link1})|{v1:3}| <==> |{v2:3}|[{k2_s}]({link2}){ribbon_r}\n')

    if len(rs) == 2:
        rs.append('No tree to output.\n')

    if len(rs) > 40:
        rs.extend(t.show_metric_keys_table())

    out_dir = f'{config["out_dir_md"]}Reports/'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    f_name = f'{out_dir}circular_file_refs.md'
    return utils.write_lines_to_file(f_name, rs)
コード例 #8
0
def file_list_indented_sloc(config, t: tree.UdbTree) -> bool:
    # base_path = config['udb_source_root']
    indent = '&nbsp;' * 4

    ribbon_h1 = "".join(f'{x}|' for x in sorted(t.metric_keys.values()))
    ribbon_h2 = ' ---:|' * len(t.metric_keys) + ' '
    header1 = f'|Path {indent * 10} |{indent * 3}SLOC|{ribbon_h1}\n'
    header2 = f'|:---             |---:   |{ribbon_h2}\n'

    rs = ['# File List Indented\n\n']
    rs.extend(t.show_metric_keys_table())
    rs.extend(['\n\n### File List\n\n', header1, header2])

    for arch in t.root.walk():
        if arch.name == 'Directory Structure':
            link = '(../Metrics/application-metrics.md)'
        else:
            path = arch.path.replace('Directory Structure/', '')
            parts = path.split('/')
            tail = parts[-1]
            link = f'(../Metrics/{path}/{tail}-metrics.md)'
        ribbon = arch.metrics_ribbon
        sloc = arch.metrics['CountStmt']['val']
        rs.append(f"|{arch.level * indent}[{arch.name:30s}]{link}|{sloc:5,}{ribbon}\n")
        for ent in arch.ent_children:
            spaces = (arch.level + 1) * indent
            ent_sloc = ent.metrics['CountStmt']['val']
            rel = arch.path.replace('Directory Structure/', '')
            link = get_link_to_file_metrics(rel, ent.name)
            ribbon = ent.metrics_ribbon
            rs.append(f"|{spaces}[{ent.name:30s}]({link})|{ent_sloc:5,}{ribbon}\n")

    if len(rs) == 2:
        rs.append('No tree to output.\n')

    if len(rs) > 40:
        rs.extend(t.show_metric_keys_table())

    out_dir = f'{config["out_dir_md"]}Reports/'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    f_name = f'{out_dir}file_list_indented_sloc.md'
    return utils.write_lines_to_file(f_name, rs)
コード例 #9
0
def run_text_reports(config: Dict, t: tree.UdbTree):
    out_dir = config['out_dir_text']
    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    if not utils.rm_directory_and_files(out_dir):
        sys.exit(f'Could not remove files and dirs in "{out_dir}, aborting.')

    rs = [k for d in config['text_reports'] for k, v in d.items() if v]
    for r in rs:
        if hasattr(reports_text, r):
            xs = getattr(reports_text, r)(config, t)
            if xs is not None and len(xs) > 0:
                xs = map(lambda x: x + '\n', xs)
                f_name = '{}{}.txt'.format(out_dir, r)
                print('    writing: {}'.format(f_name))
                if not utils.write_lines_to_file(f_name, xs):
                    sys.exit(f'Could not write file "{f_name}", aborting.')
        else:
            print(f'Could not find report: {r}')
コード例 #10
0
def red_metrics(config, t: tree.UdbTree) -> bool:
    # base_path = config['udb_source_root']
    header1 = '|Path    |  Value|    SLOC|\n'
    header2 = '|:---             |---:   |---:    |\n'
    rs = ['# Red Metrics\n\n']
    rs.extend(converters.metric_key_range_table())

    xs = []
    for arch in t.root.walk():
        for ent in arch.ent_children:
            for k, v in ent.metrics.items():
                (val, color) = (v['val'], v['color'])
                if color == 'red':
                    # aaa = rel_path(ent.long_name, base_path)
                    rel = arch.path.replace('Directory Structure/', '')
                    link = get_link_to_file_metrics(rel, ent.name)
                    xs.append((k, f'{rel}/{ent.name}', val, ent.metrics['CountStmt']['val'], link))

    xs.sort(key=lambda tup: tup[0])
    for key, group in groupby(xs, lambda x: x[0]):
        rs.extend([f'\n## {key}\n', header1, header2])
        ts = [x for x in group]
        if key == 'RatioCommentToCode':
            ts.sort(key=lambda tup: tup[2])
        else:
            ts.sort(key=lambda tup: tup[2], reverse=True)
        for t in ts:
            rs.append(f'|[{t[1]}]({t[4]})|{t[2]:,}|{t[3]:,}|\n')

    if len(rs) > 40:
        rs.extend(converters.metric_key_range_table())

    out_dir = f'{config["out_dir_md"]}Reports/'
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    f_name = f'{out_dir}red_metrics.md'
    return utils.write_lines_to_file(f_name, rs)
コード例 #11
0
 def write(_dir, _file, _ms) -> bool:
     if not os.path.exists(_dir):
         os.makedirs(_dir)
     return utils.write_lines_to_file(_file, _ms)