def main(): train_df, test_df, val_df = get_data('data/negative.csv', 'data/positive.csv') write_lines_to_file(train_df['text'], 'train.txt') train_df.to_csv('data/train_df.csv', header=True, sep=';', index=False) test_df.to_csv('data/test_df.csv', header=True, sep=';', index=False) val_df.to_csv('data/val_df.csv', header=True, sep=';', index=False)
def __change_config(self, identifier, old_value, new_value): """Change the configuration file based on the passed values.""" line_number = self.__get_line_number(identifier) lines = utils.read_lines_from_file(self.menu) if line_number == -1: line_number = 0 lines.insert(0,"\n") line = lines[line_number] if old_value == None: lines[line_number] = new_value elif old_value == "vga=": place = line.find(old_value) end = line.find(" ", place) if place != -1: if end != -1: line = line[:place] + new_value + line[end:] else: line = line[:place] + new_value + "\n" else: line = line[:-1] + " " + new_value + "\n" lines[line_number] = line else: if old_value[0] != "#" and line.find(" " + old_value) != -1: old_value = " " + old_value if old_value[0] == "#" and line.find("# " + old_value[1:]) != -1: old_value = "# " + old_value[1:] line = line.replace(old_value, new_value) lines[line_number] = line utils.write_lines_to_file(self.menu, lines)
def __change_config(self, identifier, old_value, new_value): """Change the configuration file based on the passed values.""" line_number = self.__get_line_number(identifier) lines = utils.read_lines_from_file(self.menu) if line_number == -1: line_number = 0 lines.insert(0, "\n") line = lines[line_number] if old_value == None: lines[line_number] = new_value elif old_value == "vga=": place = line.find(old_value) end = line.find(" ", place) if place != -1: if end != -1: line = line[:place] + new_value + line[end:] else: line = line[:place] + new_value + "\n" else: line = line[:-1] + " " + new_value + "\n" lines[line_number] = line else: if old_value[0] != "#" and line.find(" " + old_value) != -1: old_value = " " + old_value if old_value[0] == "#" and line.find("# " + old_value[1:]) != -1: old_value = "# " + old_value[1:] line = line.replace(old_value, new_value) lines[line_number] = line utils.write_lines_to_file(self.menu, lines)
def generate_feature_vectors(all_figures_dir, output_data_folder): """Generating feature vectors for figures from text data to be used in a neural model. Args: all_figures_dir: (string) the folder with the textual fields for the figures (the files used to build an index). output_data_folder: (string) a folder to write the different output files. Returns: (list). Writing the outputs to the output folder, and returning the list of figure identifiers. """ if not os.path.exists(output_data_folder): os.mkdir(output_data_folder) text_data, figure_identifiers, image_file_names = extract_all_figures( all_figures_dir) with open(output_data_folder + '/raw_data.txt', 'w+') as raw_data_file: for words in text_data: words_encoded = [w for w in words] raw_data_file.write(' '.join(words_encoded) + '\n') utils.write_lines_to_file(output_data_folder + '/image_files.txt', image_file_names) all_tokens = itertools.chain.from_iterable(text_data) word_to_id = {token: idx for idx, token in enumerate(set(all_tokens))} all_tokens = itertools.chain.from_iterable(text_data) id_to_word = [token for idx, token in enumerate(set(all_tokens))] id_to_word = np.asarray(id_to_word) x_token_ids = [[word_to_id[token] for token in x] for x in text_data] count = np.zeros(id_to_word.shape) for x in x_token_ids: for token in x: count[token] += 1 indices = np.argsort(-count) id_to_word = id_to_word[indices] word_to_id = {token: idx for idx, token in enumerate(id_to_word)} x_token_ids = [[word_to_id.get(token, -1) + 1 for token in x] for x in text_data] np.save(output_data_folder + '/words_map.npy', np.asarray(id_to_word)) with open(output_data_folder + '/vectors.txt', 'w+') as f: for tokens in x_token_ids: for token in tokens: f.write(str(token) + ' ') f.write("\n") with open(output_data_folder + '/identifiers.txt', 'w+') as identifier_file: for identifier in figure_identifiers: identifier_file.write(identifier + '\n') return figure_identifiers
def main(settings): """.""" files = recursive_glob(settings['source_dir'], settings['filter']) patterns = extract_patterns(get_lines_from_file(settings['patterns']), settings['sep']) for filename in files: lines = get_lines_from_file(filename) for search, replace in patterns: lines = search_and_replace_lines(search, replace, lines) write_lines_to_file(filename, lines)
def write_index_md(path, project) -> bool: rs = [ '# Usage\n', f'Reporting for {project}\n', 'Static site generated by Mkdocs from custom Python scripts and a Scitools Understand database\n' '### Reports\n', '* Direct Circular File References\n', ' 1. File 1 depends on File 2 Count times\n', ' 2. File 2 depends on File 1 Count times\n', '* File List Indented\n', ' 1. Indented source file listing\n', ' 2. Includes cumulative SLOC count\n', '* Red Metrics\n', ' 1. Listing by metric type for files that contain at least one red metric\n', ' 2. for item(s) within the file\n', '### Searches\n', '* Searches do not work when opening site/index.html file directly from web browser\n', '* Searches worked when opening site/index.html file from web server\n', ' 1. In terminal window move to ```site``` directory\n', ' 2. Start web server at command prompt: ```python3 -m http.server```\n', ' 3. Open ```http://127.0.0.1:8000/index.html``` in web browser\n', '### Metrics\n', '* Summarized and scored metrics\n', '* Rolled up for all directories below current\n', ] rs.extend(converters.metric_key_range_table()) f_name = f'{path}/index.md' return utils.write_lines_to_file(f_name, rs)
def circular_file_refs(config, t: tree.UdbTree) -> bool: deps = get_dep_on_dict(t) bys = get_dep_by_dict(deps) circular_depends = calc_circular_dependencies(deps, bys) ribbons = get_ribbons_dict(t) ribbon_h1_left = "".join(f'|{x}' for x in sorted(t.metric_keys.values())) ribbon_h1_right = ribbon_h1_left[1:] + '|' ribbon_h2_left = '| ---:' * len(t.metric_keys) + ' ' ribbon_h2_right = ribbon_h2_left[1:] + '|' header1 = f'{ribbon_h1_left}|File 1 | Dep Cnt | | Dep Cnt |File 2 |{ribbon_h1_right}\n' header2 = f'{ribbon_h2_left}|:--- | :---: |:---:| :---: |:--- |{ribbon_h2_right}\n' rs = ['# Direct Circular File References\n\n'] rs.extend(t.show_metric_keys_table()) rs.extend(['\n\n### Circular References\n\n', header1, header2]) uniques = set() for k1, vs in circular_depends.items(): for k2 in vs.keys(): # ensure no duplicates ls = [k1, k2] ls.sort() composite_key = f'{ls[0]}-{ls[1]}' if composite_key in uniques: continue uniques.add(composite_key) k1_s = t.uids[k1].path.replace('Directory Structure/', '') name1 = k1_s.split('/')[-1] rel = k1_s.replace(f'/{name1}', '') link1 = get_link_to_file_metrics(rel, name1) v1 = vs[k2] k2_s = t.uids[k2].path.replace('Directory Structure/', '') name2 = k2_s.split('/')[-1] rel = k2_s.replace(f'/{name2}', '') link2 = get_link_to_file_metrics(rel, name2) v2 = circular_depends[k2][k1] ribbon_l = ribbons[k1] ribbon_r = ribbons[k2] rs.append(f'{ribbon_l}[{k1_s}]({link1})|{v1:3}| <==> |{v2:3}|[{k2_s}]({link2}){ribbon_r}\n') if len(rs) == 2: rs.append('No tree to output.\n') if len(rs) > 40: rs.extend(t.show_metric_keys_table()) out_dir = f'{config["out_dir_md"]}Reports/' if not os.path.exists(out_dir): os.mkdir(out_dir) f_name = f'{out_dir}circular_file_refs.md' return utils.write_lines_to_file(f_name, rs)
def file_list_indented_sloc(config, t: tree.UdbTree) -> bool: # base_path = config['udb_source_root'] indent = ' ' * 4 ribbon_h1 = "".join(f'{x}|' for x in sorted(t.metric_keys.values())) ribbon_h2 = ' ---:|' * len(t.metric_keys) + ' ' header1 = f'|Path {indent * 10} |{indent * 3}SLOC|{ribbon_h1}\n' header2 = f'|:--- |---: |{ribbon_h2}\n' rs = ['# File List Indented\n\n'] rs.extend(t.show_metric_keys_table()) rs.extend(['\n\n### File List\n\n', header1, header2]) for arch in t.root.walk(): if arch.name == 'Directory Structure': link = '(../Metrics/application-metrics.md)' else: path = arch.path.replace('Directory Structure/', '') parts = path.split('/') tail = parts[-1] link = f'(../Metrics/{path}/{tail}-metrics.md)' ribbon = arch.metrics_ribbon sloc = arch.metrics['CountStmt']['val'] rs.append(f"|{arch.level * indent}[{arch.name:30s}]{link}|{sloc:5,}{ribbon}\n") for ent in arch.ent_children: spaces = (arch.level + 1) * indent ent_sloc = ent.metrics['CountStmt']['val'] rel = arch.path.replace('Directory Structure/', '') link = get_link_to_file_metrics(rel, ent.name) ribbon = ent.metrics_ribbon rs.append(f"|{spaces}[{ent.name:30s}]({link})|{ent_sloc:5,}{ribbon}\n") if len(rs) == 2: rs.append('No tree to output.\n') if len(rs) > 40: rs.extend(t.show_metric_keys_table()) out_dir = f'{config["out_dir_md"]}Reports/' if not os.path.exists(out_dir): os.mkdir(out_dir) f_name = f'{out_dir}file_list_indented_sloc.md' return utils.write_lines_to_file(f_name, rs)
def run_text_reports(config: Dict, t: tree.UdbTree): out_dir = config['out_dir_text'] if not os.path.isdir(out_dir): os.mkdir(out_dir) if not utils.rm_directory_and_files(out_dir): sys.exit(f'Could not remove files and dirs in "{out_dir}, aborting.') rs = [k for d in config['text_reports'] for k, v in d.items() if v] for r in rs: if hasattr(reports_text, r): xs = getattr(reports_text, r)(config, t) if xs is not None and len(xs) > 0: xs = map(lambda x: x + '\n', xs) f_name = '{}{}.txt'.format(out_dir, r) print(' writing: {}'.format(f_name)) if not utils.write_lines_to_file(f_name, xs): sys.exit(f'Could not write file "{f_name}", aborting.') else: print(f'Could not find report: {r}')
def red_metrics(config, t: tree.UdbTree) -> bool: # base_path = config['udb_source_root'] header1 = '|Path | Value| SLOC|\n' header2 = '|:--- |---: |---: |\n' rs = ['# Red Metrics\n\n'] rs.extend(converters.metric_key_range_table()) xs = [] for arch in t.root.walk(): for ent in arch.ent_children: for k, v in ent.metrics.items(): (val, color) = (v['val'], v['color']) if color == 'red': # aaa = rel_path(ent.long_name, base_path) rel = arch.path.replace('Directory Structure/', '') link = get_link_to_file_metrics(rel, ent.name) xs.append((k, f'{rel}/{ent.name}', val, ent.metrics['CountStmt']['val'], link)) xs.sort(key=lambda tup: tup[0]) for key, group in groupby(xs, lambda x: x[0]): rs.extend([f'\n## {key}\n', header1, header2]) ts = [x for x in group] if key == 'RatioCommentToCode': ts.sort(key=lambda tup: tup[2]) else: ts.sort(key=lambda tup: tup[2], reverse=True) for t in ts: rs.append(f'|[{t[1]}]({t[4]})|{t[2]:,}|{t[3]:,}|\n') if len(rs) > 40: rs.extend(converters.metric_key_range_table()) out_dir = f'{config["out_dir_md"]}Reports/' if not os.path.exists(out_dir): os.mkdir(out_dir) f_name = f'{out_dir}red_metrics.md' return utils.write_lines_to_file(f_name, rs)
def write(_dir, _file, _ms) -> bool: if not os.path.exists(_dir): os.makedirs(_dir) return utils.write_lines_to_file(_file, _ms)