def write_export_data(filetype='json', directory=None, filename=None, data=None): file_path = path.join(directory, filename) with open_(file_path, 'a', encoding='utf-8') as export_file: if filetype == 'json': export_data = json.dumps(data, indent=4, ensure_ascii=False) else: export_data = data if 'jsonraw' in filename: click.echo( " We are are now writing the raw Readability Reader API export data to your hard drive." ) elif 'json' in filename: click.echo( " All your bookmarks have been processed and were converted to JSON." ) elif 'html' in filename: click.echo( " All your bookmarks have been processed and were converted to HTML." ) else: click.echo( " We are are now writing a file with all your links to your hard drive." ) export_file.write(export_data) # implicit close() return {'file_path': file_path, 'file_size': path.getsize(file_path)}
def load2_(infile, target_name, feature_names=None, features_maps=None, target_map=None, sep="\t"): """load file to IML-data object """ fp = open_(infile, encoding="utf-8") headline = fp.readline() headItems = headline.strip("\n").split("\t") targetIdx = headItems.index(target_name) features = copy(headItems) features.pop(targetIdx) if feature_names: features = feature_names idxs = [] for feature in feature_names: idx = headItems.index(feature) idxs.append(idx) else: idxs = range(len(headItems)) idxs.remove(targetIdx) data = [] targets = [] for line in fp.readlines(): items = line.strip("\n").split("\t") datus = [items[i] for i in idxs] target = items[targetIdx] data.append(datus) targets.append(target) outdict = {} for i, feature in enumerate(features): col_data = map(lambda d: d[i], data) # del empty # print col_data subst_idx = filter(lambda i: col_data[i], range(len(col_data))) col_data = map(lambda i: col_data[i], subst_idx) targets = map(lambda i: targets[i], subst_idx) if features_maps: col_data = map(features_maps[i], col_data) is_num = map(lambda x: isdigit(x), col_data) if sum(is_num) / len(col_data) < 0.9 or len(set(col_data)) <= 5: outdict[feature] = pd.Series(col_data, dtype="category") else: outdict[feature] = map(float, col_data) return pd.DataFrame(outdict), pd.DataFrame( {target_name: pd.Series(targets, dtype="category")})
def desc_(data_file, feature_names, target_name, target_map=keep, features_maps=[None], Osep="\t", Isep="\t"): show_dict = {} show_items = [] if 1: _, targets = load2_(data_file, target_name=target_name, feature_names=[target_name], target_map=[target_map], features_maps=[target_map], sep=Isep) target_name = targets.keys()[0] vals = targets[target_name].cat.categories p0 = {"Items": {"-": vals}} p1 = target_table(targets) show_dict = merge_dicts(show_dict, p0, p1) show_items.append("Items") show_items.append(target_name) for i, feature_name in enumerate(feature_names): data, _ = load2_(data_file, target_name=target_name, feature_names=[feature_name], target_map=[target_map], features_maps=features_maps[i:i + 1], sep=Isep) if is_continuous(data[feature_name]): Boxplot(data, targets, i + 1) else: Pie(data, targets, i + 1) p = features_table(data, targets) show_dict = merge_dicts(show_dict, p) show_items.append(feature_name) # fh = open("00.desc.txt", "w") fh = open_("00.desc.txt", "w", encoding="utf-8") text = show_table(show_dict, show_items, Osep) fh.write(text + "\n") fh.close()
def _test_export_to_txt_frame_style(self, frame_style, chars, positive=True): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_txt( utils.table, temp.file, encoding="utf-8", frame_style=frame_style ) if sys.version_info.major < 3: from io import open as open_ else: open_ = open file_data = open_(temp.name, "rt", encoding="utf-8").read() for char in chars: if positive: self.assertIn(char, file_data) else: self.assertNotIn(char, file_data)
def _test_export_to_txt_frame_style(self, frame_style, chars, positive=True): temp = tempfile.NamedTemporaryFile(delete=False) self.files_to_delete.append(temp.name) rows.export_to_txt(utils.table, temp.file, encoding='utf-8', frame_style=frame_style) if sys.version_info.major < 3: from io import open as open_ else: open_ = open file_data = open_(temp.name, 'rt', encoding='utf-8').read() for char in chars: if positive: self.assertIn(char, file_data) else: self.assertNotIn(char, file_data)