Example #1
0
def write_export_data(filetype='json',
                      directory=None,
                      filename=None,
                      data=None):
    file_path = path.join(directory, filename)

    with open_(file_path, 'a', encoding='utf-8') as export_file:
        if filetype == 'json':
            export_data = json.dumps(data, indent=4, ensure_ascii=False)
        else:
            export_data = data

        if 'jsonraw' in filename:
            click.echo(
                " We are are now writing the raw Readability Reader API export data to your hard drive."
            )
        elif 'json' in filename:
            click.echo(
                " All your bookmarks have been processed and were converted to JSON."
            )
        elif 'html' in filename:
            click.echo(
                " All your bookmarks have been processed and were converted to HTML."
            )
        else:
            click.echo(
                " We are are now writing a file with all your links to your hard drive."
            )

        export_file.write(export_data)
    # implicit close()

    return {'file_path': file_path, 'file_size': path.getsize(file_path)}
Example #2
0
def load2_(infile,
           target_name,
           feature_names=None,
           features_maps=None,
           target_map=None,
           sep="\t"):
    """load file to IML-data object
    """
    fp = open_(infile, encoding="utf-8")
    headline = fp.readline()
    headItems = headline.strip("\n").split("\t")
    targetIdx = headItems.index(target_name)
    features = copy(headItems)
    features.pop(targetIdx)

    if feature_names:
        features = feature_names
        idxs = []
        for feature in feature_names:
            idx = headItems.index(feature)
            idxs.append(idx)
    else:
        idxs = range(len(headItems))
        idxs.remove(targetIdx)

    data = []
    targets = []
    for line in fp.readlines():
        items = line.strip("\n").split("\t")
        datus = [items[i] for i in idxs]
        target = items[targetIdx]
        data.append(datus)
        targets.append(target)

    outdict = {}

    for i, feature in enumerate(features):
        col_data = map(lambda d: d[i], data)
        # del empty
        # print col_data
        subst_idx = filter(lambda i: col_data[i], range(len(col_data)))
        col_data = map(lambda i: col_data[i], subst_idx)
        targets = map(lambda i: targets[i], subst_idx)

        if features_maps:
            col_data = map(features_maps[i], col_data)
        is_num = map(lambda x: isdigit(x), col_data)
        if sum(is_num) / len(col_data) < 0.9 or len(set(col_data)) <= 5:
            outdict[feature] = pd.Series(col_data, dtype="category")
        else:
            outdict[feature] = map(float, col_data)
    return pd.DataFrame(outdict), pd.DataFrame(
        {target_name: pd.Series(targets, dtype="category")})
Example #3
0
def desc_(data_file,
          feature_names,
          target_name,
          target_map=keep,
          features_maps=[None],
          Osep="\t",
          Isep="\t"):
    show_dict = {}
    show_items = []

    if 1:
        _, targets = load2_(data_file,
                            target_name=target_name,
                            feature_names=[target_name],
                            target_map=[target_map],
                            features_maps=[target_map],
                            sep=Isep)
        target_name = targets.keys()[0]
        vals = targets[target_name].cat.categories
        p0 = {"Items": {"-": vals}}
        p1 = target_table(targets)
        show_dict = merge_dicts(show_dict, p0, p1)
        show_items.append("Items")
        show_items.append(target_name)
    for i, feature_name in enumerate(feature_names):
        data, _ = load2_(data_file,
                         target_name=target_name,
                         feature_names=[feature_name],
                         target_map=[target_map],
                         features_maps=features_maps[i:i + 1],
                         sep=Isep)
        if is_continuous(data[feature_name]):
            Boxplot(data, targets, i + 1)
        else:
            Pie(data, targets, i + 1)
        p = features_table(data, targets)
        show_dict = merge_dicts(show_dict, p)
        show_items.append(feature_name)

    # fh = open("00.desc.txt", "w")
    fh = open_("00.desc.txt", "w", encoding="utf-8")
    text = show_table(show_dict, show_items, Osep)
    fh.write(text + "\n")
    fh.close()
Example #4
0
    def _test_export_to_txt_frame_style(self, frame_style, chars, positive=True):
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_txt(
            utils.table, temp.file, encoding="utf-8", frame_style=frame_style
        )

        if sys.version_info.major < 3:
            from io import open as open_
        else:
            open_ = open

        file_data = open_(temp.name, "rt", encoding="utf-8").read()

        for char in chars:
            if positive:
                self.assertIn(char, file_data)
            else:
                self.assertNotIn(char, file_data)
    def _test_export_to_txt_frame_style(self,
                                        frame_style,
                                        chars,
                                        positive=True):
        temp = tempfile.NamedTemporaryFile(delete=False)
        self.files_to_delete.append(temp.name)
        rows.export_to_txt(utils.table,
                           temp.file,
                           encoding='utf-8',
                           frame_style=frame_style)

        if sys.version_info.major < 3:
            from io import open as open_
        else:
            open_ = open

        file_data = open_(temp.name, 'rt', encoding='utf-8').read()

        for char in chars:
            if positive:
                self.assertIn(char, file_data)
            else:
                self.assertNotIn(char, file_data)