Ejemplo n.º 1
0
def main(args, _in):
    import pandas as pd
    data = [parse_bow(line)
            for line in map(lambda x: x.strip(), _in.read().split("\n"))
            if len(line) > 0
            ]

    df = pd.DataFrame(data)
    group = df.groupby("label")
    t = group.sum().transpose()
    t["diff"] = abs(t[-1] - t[1])/(t[-1] + t[1])
    t["usage"] = t[-1] + t[1]
    columns = t[(t["diff"] >= args.n) & (t["usage"] > 5)].index
    with open(args.m, "w") as f:
        f.write("\n".join(columns))
Ejemplo n.º 2
0
def main(args, _in):
    import pandas as pd
    data = [
        parse_bow(line) for line in map(lambda x: x.strip(),
                                        _in.read().split("\n"))
        if len(line) > 0
    ]

    df = pd.DataFrame(data)
    group = df.groupby("label")
    t = group.sum().transpose()
    t["diff"] = abs(t[-1] - t[1]) / (t[-1] + t[1])
    t["usage"] = t[-1] + t[1]
    columns = t[(t["diff"] >= args.n) & (t["usage"] > 5)].index
    with open(args.m, "w") as f:
        f.write("\n".join(columns))
Ejemplo n.º 3
0
def main(args, _in, _out):
    import pandas as pd
    from rank.util.feature import parse_bow, vw_model
    with open(args.m, "r") as f:
        mask = list(map(lambda x: x.strip(), f.read().split("\n")))

    data = [parse_bow(line)
            for line in map(lambda x: x.strip(), _in.read().split("\n"))
            if len(line) > 0
            ]

    df = pd.DataFrame(data)
    filtered = df[mask]
    for label, line in zip(df["label"], filtered.values):
        _out.write("{0}\n".format(vw_model(" ".join(
            map(lambda x: "{0}:{1}".format(*x), zip(filtered.columns, line))), label)
        ))
Ejemplo n.º 4
0
def main(args, _in, _out):
    import pandas as pd
    from rank.util.feature import parse_bow, vw_model
    with open(args.m, "r") as f:
        mask = list(map(lambda x: x.strip(), f.read().split("\n")))

    data = [
        parse_bow(line) for line in map(lambda x: x.strip(),
                                        _in.read().split("\n"))
        if len(line) > 0
    ]

    df = pd.DataFrame(data)
    filtered = df[mask]
    for label, line in zip(df["label"], filtered.values):
        _out.write("{0}\n".format(
            vw_model(
                " ".join(
                    map(lambda x: "{0}:{1}".format(*x),
                        zip(filtered.columns, line))), label)))